Calling exsltRegisterAll instead of exsltDynRegister
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2006, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.8 2006-05-08 16:58:25 quinn Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24
25 #if HAVE_XML2
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #if HAVE_XSLT
30 #include <libxslt/xsltutils.h>
31 #include <libxslt/transform.h>
32 #endif
33 #if HAVE_EXSLT
34 #include <libexslt/exslt.h>
35 #endif
36
37 /** \brief The internal structure for yaz_record_conv_t */
38 struct yaz_record_conv_struct {
39     /** \brief memory for configuration */
40     NMEM nmem;
41
42     /** \brief conversion rules (allocated using NMEM) */
43     struct yaz_record_conv_rule *rules;
44
45     /** \brief pointer to last conversion rule pointer in chain */
46     struct yaz_record_conv_rule **rules_p;
47
48     /** \brief string buffer for error messages */
49     WRBUF wr_error;
50
51     /** \brief path for opening files  */
52     char *path;
53 };
54
55 /** \brief tranformation types (rule types) */
56 enum YAZ_RECORD_CONV_RULE 
57 {
58     YAZ_RECORD_CONV_RULE_XSLT,
59     YAZ_RECORD_CONV_RULE_MARC
60 };
61
62
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65     enum YAZ_RECORD_CONV_RULE which;
66     union {
67 #if HAVE_XSLT
68         struct {
69             xsltStylesheetPtr xsp;
70         } xslt;
71 #endif
72         struct {
73             yaz_iconv_t iconv_t;
74             int input_format;
75             int output_format;
76         } marc;
77     } u;
78     struct yaz_record_conv_rule *next;
79 };
80
81 /** \brief reset rules+configuration */
82 static void yaz_record_conv_reset(yaz_record_conv_t p)
83 {
84     struct yaz_record_conv_rule *r;
85     for (r = p->rules; r; r = r->next)
86     {
87         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
88         {
89             if (r->u.marc.iconv_t)
90                 yaz_iconv_close(r->u.marc.iconv_t);
91         }
92 #if HAVE_XSLT
93         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
94         {
95             xsltFreeStylesheet(r->u.xslt.xsp);
96         }
97 #endif
98     }
99     wrbuf_rewind(p->wr_error);
100     nmem_reset(p->nmem);
101
102     p->rules = 0;
103
104     p->rules_p = &p->rules;
105 }
106
107 yaz_record_conv_t yaz_record_conv_create()
108 {
109     yaz_record_conv_t p = xmalloc(sizeof(*p));
110     p->nmem = nmem_create();
111     p->wr_error = wrbuf_alloc();
112     p->rules = 0;
113     p->path = 0;
114
115 #if HAVE_EXSLT
116     exsltRegisterAll(); 
117 #endif
118     yaz_record_conv_reset(p);
119     return p;
120 }
121
122 void yaz_record_conv_destroy(yaz_record_conv_t p)
123 {
124     if (p)
125     {
126         yaz_record_conv_reset(p);
127         nmem_destroy(p->nmem);
128         wrbuf_free(p->wr_error, 1);
129         xfree(p->path);
130         xfree(p);
131     }
132 }
133
134 /** \brief adds a rule */
135 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
136                                              enum YAZ_RECORD_CONV_RULE type)
137 {
138     struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
139     r->which = type;
140     r->next = 0;
141     *p->rules_p = r;
142     p->rules_p = &r->next;
143     return r;
144 }
145
146 /** \brief parse 'xslt' conversion node */
147 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
148 {
149 #if HAVE_XSLT
150     struct _xmlAttr *attr;
151     const char *stylesheet = 0;
152
153     for (attr = ptr->properties; attr; attr = attr->next)
154     {
155         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
156             attr->children && attr->children->type == XML_TEXT_NODE)
157             stylesheet = (const char *) attr->children->content;
158         else
159         {
160             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
161                          "Expected stylesheet.", attr->name);
162             return -1;
163         }
164     }
165     if (!stylesheet)
166     {
167         wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
168         return -1;
169     }
170     else
171     {
172         char fullpath[1024];
173         xsltStylesheetPtr xsp;
174         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
175         {
176             wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
177                          stylesheet, p->path);
178             return -1;
179         }
180         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
181         if (!xsp)
182         {
183             wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
184             return -1;
185         }
186         else
187         {
188             struct yaz_record_conv_rule *r = 
189                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
190             r->u.xslt.xsp = xsp;
191         }
192     }
193     return 0;
194 #else
195     wrbuf_printf(p->wr_error, "xslt unsupported."
196                  " YAZ compiled without XSLT support");
197     return -1;
198 #endif
199 }
200
201 /** \brief parse 'marc' conversion node */
202 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
203 {
204     struct _xmlAttr *attr;
205     const char *input_charset = 0;
206     const char *output_charset = 0;
207     const char *input_format = 0;
208     const char *output_format = 0;
209     int input_format_mode = 0;
210     int output_format_mode = 0;
211     struct yaz_record_conv_rule *r;
212     yaz_iconv_t cd = 0;
213
214     for (attr = ptr->properties; attr; attr = attr->next)
215     {
216         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
217             attr->children && attr->children->type == XML_TEXT_NODE)
218             input_charset = (const char *) attr->children->content;
219         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
220             attr->children && attr->children->type == XML_TEXT_NODE)
221             output_charset = (const char *) attr->children->content;
222         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
223             attr->children && attr->children->type == XML_TEXT_NODE)
224             input_format = (const char *) attr->children->content;
225         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
226             attr->children && attr->children->type == XML_TEXT_NODE)
227             output_format = (const char *) attr->children->content;
228         else
229         {
230             wrbuf_printf(p->wr_error, "Bad attribute '%s'", attr->name);
231             return -1;
232         }
233     }
234     if (!input_format)
235     {
236         wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
237         return -1;
238     }
239     else if (!strcmp(input_format, "marc"))
240     {
241         input_format_mode = YAZ_MARC_ISO2709;
242     }
243     else if (!strcmp(input_format, "xml"))
244     {
245         input_format_mode = YAZ_MARC_MARCXML;
246         /** Libxml2 generates UTF-8 encoding by default .
247             So we convert from UTF-8 to outputcharset (if defined) 
248         */
249         if (!input_charset && output_charset)
250             input_charset = "utf-8";
251     }
252     else
253     {
254         wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
255         return -1;
256     }
257     
258     if (!output_format)
259     {
260         wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
261         return -1;
262     }
263     else if (!strcmp(output_format, "line"))
264     {
265         output_format_mode = YAZ_MARC_LINE;
266     }
267     else if (!strcmp(output_format, "marcxml"))
268     {
269         output_format_mode = YAZ_MARC_MARCXML;
270         if (input_charset && !output_charset)
271             output_charset = "utf-8";
272     }
273     else if (!strcmp(output_format, "marc"))
274     {
275         output_format_mode = YAZ_MARC_ISO2709;
276     }
277     else if (!strcmp(output_format, "marcxchange"))
278     {
279         output_format_mode = YAZ_MARC_XCHANGE;
280         if (input_charset && !output_charset)
281             output_charset = "utf-8";
282     }
283     else
284     {
285         wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
286         return -1;
287     }
288     if (input_charset && output_charset)
289     {
290         cd = yaz_iconv_open(output_charset, input_charset);
291         if (!cd)
292         {
293             wrbuf_printf(p->wr_error, "Unsupported character set mamping"
294                          " inputcharset=%s outputcharset=%s",
295                          input_charset, output_charset);
296             return -1;
297         }
298     }
299     else if (input_charset)
300     {
301         wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
302         return -1;
303     }
304     else if (output_charset)
305     {
306         wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
307         return -1;
308     }
309     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
310     r->u.marc.iconv_t = cd;
311
312     r->u.marc.input_format = input_format_mode;
313     r->u.marc.output_format = output_format_mode;
314     return 0;
315 }
316
317 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
318 {
319     const xmlNode *ptr = ptr_v; 
320
321     yaz_record_conv_reset(p);
322
323     if (ptr && ptr->type == XML_ELEMENT_NODE &&
324         !strcmp((const char *) ptr->name, "convert"))
325     {
326         for (ptr = ptr->children; ptr; ptr = ptr->next)
327         {
328             if (ptr->type != XML_ELEMENT_NODE)
329                 continue;
330             if (!strcmp((const char *) ptr->name, "xslt"))
331             {
332                 if (conv_xslt(p, ptr))
333                     return -1;
334             }
335             else if (!strcmp((const char *) ptr->name, "exslt"))
336             {
337 #if HAVE_EXSLT
338                 if (conv_xslt(p, ptr))
339                     return -1;
340 #else
341                 wrbuf_printf(p->wr_error, "exslt unsupported."
342                              " YAZ compiled without EXSLT support");
343                 return -1;
344 #endif
345             }
346             else if (!strcmp((const char *) ptr->name, "marc"))
347             {
348                 if (conv_marc(p, ptr))
349                     return -1;
350             }
351             else
352             {
353                 wrbuf_printf(p->wr_error, "Bad element '%s'."
354                               "Expected marc, xslt, ..", ptr->name);
355                 return -1;
356             }
357         }
358     }
359     else
360     {
361         wrbuf_printf(p->wr_error, "Missing 'convert' element");
362         return -1;
363     }
364     return 0;
365 }
366
367 int yaz_record_conv_record(yaz_record_conv_t p,
368                            const char *input_record_buf,
369                            size_t input_record_len,
370                            WRBUF output_record)
371 {
372     int ret = 0;
373     WRBUF record = output_record; /* pointer transfer */
374     struct yaz_record_conv_rule *r = p->rules;
375     wrbuf_rewind(p->wr_error);
376     
377     wrbuf_write(record, input_record_buf, input_record_len);
378     for (; ret == 0 && r; r = r->next)
379     {
380         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
381         {
382             yaz_marc_t mt = yaz_marc_create();
383
384             yaz_marc_xml(mt, r->u.marc.output_format);
385
386             if (r->u.marc.iconv_t)
387                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
388             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
389             {
390                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
391                                                wrbuf_len(record));
392                 if (sz > 0)
393                     ret = 0;
394                 else
395                     ret = -1;
396             }
397             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
398             {
399                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
400                                                wrbuf_len(record));
401                 if (!doc)
402                 {
403                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
404                     ret = -1;
405                 }
406                 else
407                 {
408                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
409                     if (ret)
410                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
411                 }
412                 xmlFreeDoc(doc);
413             }
414             else
415             {
416                 wrbuf_printf(p->wr_error, "unsupported input format");
417                 ret = -1;
418             }
419             if (ret == 0)
420             {
421                 wrbuf_rewind(record);
422                 ret = yaz_marc_write_mode(mt, record);
423                 if (ret)
424                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
425             }
426             yaz_marc_destroy(mt);
427         }
428 #if HAVE_XSLT
429         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
430         {
431             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
432                                            wrbuf_len(record));
433             if (!doc)
434             {
435                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
436                 ret = -1;
437             }
438             else
439             {
440                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
441                 if (res)
442                 {
443                     xmlChar *out_buf;
444                     int out_len;
445                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
446
447                     wrbuf_rewind(record);
448                     wrbuf_write(record, (const char *) out_buf, out_len);
449
450                     xmlFree(out_buf);
451                     xmlFreeDoc(res);
452                 }
453                 else
454                 {
455                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
456                     ret = -1;
457                 }
458                 xmlFreeDoc(doc);
459             }
460         }
461 #endif
462     }
463     return ret;
464 }
465
466 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
467 {
468     return wrbuf_buf(p->wr_error);
469 }
470
471 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
472 {
473     xfree(p->path);
474     p->path = 0;
475     if (path)
476         p->path = xstrdup(path);
477 }
478 #endif
479
480 /*
481  * Local variables:
482  * c-basic-offset: 4
483  * indent-tabs-mode: nil
484  * End:
485  * vim: shiftwidth=4 tabstop=8 expandtab
486  */
487