Make things compile --without-xslt
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2006, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.3 2006-05-04 15:31:04 adam Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24
25 #if HAVE_XSLT
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31
32 /** \brief The internal structure for yaz_record_conv_t */
33 struct yaz_record_conv_struct {
34     /** memory for configuration */
35     NMEM nmem;
36
37     /** conversion rules (allocated using NMEM) */
38     struct yaz_record_conv_rule *rules;
39
40     /** pointer to last conversion rule pointer in chain */
41     struct yaz_record_conv_rule **rules_p;
42
43     /** string buffer for error messages */
44     WRBUF wr_error;
45
46     /** path for opening files  */
47     char *path;
48 };
49
50 /** \brief tranformation types (rule types) */
51 enum YAZ_RECORD_CONV_RULE 
52 {
53     YAZ_RECORD_CONV_RULE_XSLT,
54     YAZ_RECORD_CONV_RULE_MARC
55 };
56
57
58 /** \brief tranformation info (rule info) */
59 struct yaz_record_conv_rule {
60     enum YAZ_RECORD_CONV_RULE which;
61     union {
62         struct {
63             xsltStylesheetPtr xsp;
64             int dummy;
65         } xslt;
66         struct {
67             yaz_iconv_t iconv_t;
68             int input_format;
69             int output_format;
70         } marc;
71     } u;
72     struct yaz_record_conv_rule *next;
73 };
74
75 /** reset rules+configuration */
76 static void yaz_record_conv_reset(yaz_record_conv_t p)
77 {
78     struct yaz_record_conv_rule *r;
79     for (r = p->rules; r; r = r->next)
80     {
81         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
82         {
83             if (r->u.marc.iconv_t)
84                 yaz_iconv_close(r->u.marc.iconv_t);
85         }
86         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
87         {
88             xsltFreeStylesheet(r->u.xslt.xsp);
89         }
90     }
91     wrbuf_rewind(p->wr_error);
92     nmem_reset(p->nmem);
93
94     p->rules = 0;
95
96     p->rules_p = &p->rules;
97 }
98
99 yaz_record_conv_t yaz_record_conv_create()
100 {
101     yaz_record_conv_t p = xmalloc(sizeof(*p));
102     p->nmem = nmem_create();
103     p->wr_error = wrbuf_alloc();
104     p->rules = 0;
105     p->path = 0;
106
107     yaz_record_conv_reset(p);
108     return p;
109 }
110
111 void yaz_record_conv_destroy(yaz_record_conv_t p)
112 {
113     if (p)
114     {
115         yaz_record_conv_reset(p);
116         nmem_destroy(p->nmem);
117         wrbuf_free(p->wr_error, 1);
118         xfree(p->path);
119         xfree(p);
120     }
121 }
122
123 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
124                                              enum YAZ_RECORD_CONV_RULE type)
125 {
126     struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
127     r->which = type;
128     r->next = 0;
129     *p->rules_p = r;
130     p->rules_p = &r->next;
131     return r;
132 }
133
134 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
135 {
136     struct _xmlAttr *attr;
137     const char *stylesheet = 0;
138
139     for (attr = ptr->properties; attr; attr = attr->next)
140     {
141         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
142             attr->children && attr->children->type == XML_TEXT_NODE)
143             stylesheet = (const char *) attr->children->content;
144         else
145         {
146             wrbuf_printf(p->wr_error, "Bad attribute '%s'."
147                          "Expected stylesheet.", attr->name);
148             return -1;
149         }
150     }
151     if (!stylesheet)
152     {
153         wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
154         return -1;
155     }
156     else
157     {
158         char fullpath[1024];
159         xsltStylesheetPtr xsp;
160         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
161         {
162             wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
163                          stylesheet, p->path);
164             return -1;
165         }
166         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
167         if (!xsp)
168         {
169             wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
170             return -1;
171         }
172         else
173         {
174             struct yaz_record_conv_rule *r = 
175                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
176             r->u.xslt.xsp = xsp;
177         }
178     }
179     return 0;
180 }
181
182 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
183 {
184     struct _xmlAttr *attr;
185     const char *input_charset = 0;
186     const char *output_charset = 0;
187     const char *input_format = 0;
188     const char *output_format = 0;
189     int input_format_mode = 0;
190     int output_format_mode = 0;
191     struct yaz_record_conv_rule *r;
192     yaz_iconv_t cd = 0;
193
194     for (attr = ptr->properties; attr; attr = attr->next)
195     {
196         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
197             attr->children && attr->children->type == XML_TEXT_NODE)
198             input_charset = (const char *) attr->children->content;
199         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
200             attr->children && attr->children->type == XML_TEXT_NODE)
201             output_charset = (const char *) attr->children->content;
202         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
203             attr->children && attr->children->type == XML_TEXT_NODE)
204             input_format = (const char *) attr->children->content;
205         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
206             attr->children && attr->children->type == XML_TEXT_NODE)
207             output_format = (const char *) attr->children->content;
208         else
209         {
210             wrbuf_printf(p->wr_error, "Bad attribute '%s'.", attr->name);
211             return -1;
212         }
213     }
214     if (!input_format)
215     {
216         wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
217         return -1;
218     }
219     else if (!strcmp(input_format, "marc"))
220     {
221         input_format_mode = YAZ_MARC_ISO2709;
222     }
223     else if (!strcmp(input_format, "xml"))
224     {
225         input_format_mode = YAZ_MARC_MARCXML;
226         /** Libxml2 generates UTF-8 encoding by default .
227             So we convert from UTF-8 to outputcharset (if defined) 
228         */
229         if (!input_charset && output_charset)
230             input_charset = "utf-8";
231     }
232     else
233     {
234         wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
235         return -1;
236     }
237     
238     if (!output_format)
239     {
240         wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
241         return -1;
242     }
243     else if (!strcmp(output_format, "line"))
244     {
245         output_format_mode = YAZ_MARC_LINE;
246     }
247     else if (!strcmp(output_format, "marcxml"))
248     {
249         output_format_mode = YAZ_MARC_MARCXML;
250         if (input_charset && !output_charset)
251             output_charset = "utf-8";
252     }
253     else if (!strcmp(output_format, "marc"))
254     {
255         output_format_mode = YAZ_MARC_ISO2709;
256     }
257     else if (!strcmp(output_format, "marcxchange"))
258     {
259         output_format_mode = YAZ_MARC_XCHANGE;
260         if (input_charset && !output_charset)
261             output_charset = "utf-8";
262     }
263     else
264     {
265         wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
266         return -1;
267     }
268     if (input_charset && output_charset)
269     {
270         cd = yaz_iconv_open(output_charset, input_charset);
271         if (!cd)
272         {
273             wrbuf_printf(p->wr_error, "Unsupported character set mamping"
274                          " inputcharset=%s outputcharset=%s",
275                          input_charset, output_charset);
276             return -1;
277         }
278     }
279     else if (input_charset)
280     {
281         wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
282         return -1;
283     }
284     else if (output_charset)
285     {
286         wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
287         return -1;
288     }
289     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
290     r->u.marc.iconv_t = cd;
291
292     r->u.marc.input_format = input_format_mode;
293     r->u.marc.output_format = output_format_mode;
294     return 0;
295 }
296
297 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
298 {
299     const xmlNode *ptr = ptr_v; 
300
301     yaz_record_conv_reset(p);
302
303     if (ptr && ptr->type == XML_ELEMENT_NODE &&
304         !strcmp((const char *) ptr->name, "convert"))
305     {
306         for (ptr = ptr->children; ptr; ptr = ptr->next)
307         {
308             if (ptr->type != XML_ELEMENT_NODE)
309                 continue;
310             if (!strcmp((const char *) ptr->name, "xslt"))
311             {
312                 if (conv_xslt(p, ptr))
313                     return -1;
314             }
315             else if (!strcmp((const char *) ptr->name, "marc"))
316             {
317                 if (conv_marc(p, ptr))
318                     return -1;
319             }
320             else
321             {
322                 wrbuf_printf(p->wr_error, "Bad element '%s'."
323                              "Expected marc, xslt, ..", ptr->name);
324                 return -1;
325             }
326         }
327     }
328     else
329     {
330         wrbuf_printf(p->wr_error, "Missing 'convert' element");
331         return -1;
332     }
333     return 0;
334 }
335
336 int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
337                            WRBUF output_record)
338 {
339     int ret = 0;
340     WRBUF record = output_record; /* pointer transfer */
341     struct yaz_record_conv_rule *r = p->rules;
342     wrbuf_rewind(p->wr_error);
343     
344     wrbuf_puts(record, input_record);
345     for (; ret == 0 && r; r = r->next)
346     {
347         if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
348         {
349             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
350                                            wrbuf_len(record));
351             if (!doc)
352             {
353                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
354                 ret = -1;
355             }
356             else
357             {
358                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
359                 if (res)
360                 {
361                     xmlChar *out_buf;
362                     int out_len;
363                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
364
365                     wrbuf_rewind(record);
366                     wrbuf_write(record, (const char *) out_buf, out_len);
367
368                     xmlFree(out_buf);
369                     xmlFreeDoc(res);
370                 }
371                 else
372                 {
373                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
374                     ret = -1;
375                 }
376                 xmlFreeDoc(doc);
377             }
378         }
379         else if (r->which == YAZ_RECORD_CONV_RULE_MARC)
380         {
381             yaz_marc_t mt = yaz_marc_create();
382
383             yaz_marc_xml(mt, r->u.marc.output_format);
384
385             if (r->u.marc.iconv_t)
386                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
387             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
388             {
389                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
390                                                wrbuf_len(record));
391                 if (sz > 0)
392                     ret = 0;
393                 else
394                     ret = -1;
395             }
396             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
397             {
398                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
399                                                wrbuf_len(record));
400                 if (!doc)
401                 {
402                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
403                     ret = -1;
404                 }
405                 else
406                 {
407                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
408                     if (ret)
409                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
410                 }
411                 xmlFreeDoc(doc);
412             }
413             else
414             {
415                 wrbuf_printf(p->wr_error, "unsupported input format");
416                 ret = -1;
417             }
418             if (ret == 0)
419             {
420                 wrbuf_rewind(record);
421                 ret = yaz_marc_write_mode(mt, record);
422                 if (ret)
423                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
424             }
425             yaz_marc_destroy(mt);
426         }
427     }
428     return ret;
429 }
430
431 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
432 {
433     return wrbuf_buf(p->wr_error);
434 }
435
436 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
437 {
438     if (p)
439     {
440         xfree(p->path);
441         p->path = 0;
442         if (path)
443             p->path = xstrdup(path);
444     }
445 }
446 #endif
447
448 /*
449  * Local variables:
450  * c-basic-offset: 4
451  * indent-tabs-mode: nil
452  * End:
453  * vim: shiftwidth=4 tabstop=8 expandtab
454  */
455