<retrielvalinfo> config file XML syntax changed to a more intuitive and well-structur...
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2006, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.12 2006-12-12 10:41:38 marc Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24
25 #if YAZ_HAVE_XML2
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #if YAZ_HAVE_XSLT
30 #include <libxslt/xsltutils.h>
31 #include <libxslt/transform.h>
32 #endif
33 #if YAZ_HAVE_EXSLT
34 #include <libexslt/exslt.h>
35 #endif
36
37 /** \brief The internal structure for yaz_record_conv_t */
38 struct yaz_record_conv_struct {
39     /** \brief memory for configuration */
40     NMEM nmem;
41
42     /** \brief conversion rules (allocated using NMEM) */
43     struct yaz_record_conv_rule *rules;
44
45     /** \brief pointer to last conversion rule pointer in chain */
46     struct yaz_record_conv_rule **rules_p;
47
48     /** \brief string buffer for error messages */
49     WRBUF wr_error;
50
51     /** \brief path for opening files  */
52     char *path;
53 };
54
55 /** \brief tranformation types (rule types) */
56 enum YAZ_RECORD_CONV_RULE 
57 {
58     YAZ_RECORD_CONV_RULE_XSLT,
59     YAZ_RECORD_CONV_RULE_MARC
60 };
61
62
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65     enum YAZ_RECORD_CONV_RULE which;
66     union {
67 #if YAZ_HAVE_XSLT
68         struct {
69             xsltStylesheetPtr xsp;
70         } xslt;
71 #endif
72         struct {
73             yaz_iconv_t iconv_t;
74             int input_format;
75             int output_format;
76         } marc;
77     } u;
78     struct yaz_record_conv_rule *next;
79 };
80
81 /** \brief reset rules+configuration */
82 static void yaz_record_conv_reset(yaz_record_conv_t p)
83 {
84
85     struct yaz_record_conv_rule *r;
86     for (r = p->rules; r; r = r->next)
87     {
88         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
89         {
90             if (r->u.marc.iconv_t)
91                 yaz_iconv_close(r->u.marc.iconv_t);
92         }
93 #if YAZ_HAVE_XSLT
94         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
95         {
96             xsltFreeStylesheet(r->u.xslt.xsp);
97         }
98 #endif
99     }
100     wrbuf_rewind(p->wr_error);
101     nmem_reset(p->nmem);
102
103     p->rules = 0;
104
105     p->rules_p = &p->rules;
106 }
107
108 yaz_record_conv_t yaz_record_conv_create()
109 {
110     yaz_record_conv_t p = xmalloc(sizeof(*p));
111     p->nmem = nmem_create();
112     p->wr_error = wrbuf_alloc();
113     p->rules = 0;
114     p->path = 0;
115
116 #if YAZ_HAVE_EXSLT
117     exsltRegisterAll(); 
118 #endif
119     yaz_record_conv_reset(p);
120     return p;
121 }
122
123 void yaz_record_conv_destroy(yaz_record_conv_t p)
124 {
125     if (p)
126     {
127         yaz_record_conv_reset(p);
128         nmem_destroy(p->nmem);
129         wrbuf_free(p->wr_error, 1);
130         xfree(p->path);
131         xfree(p);
132     }
133 }
134
135 /** \brief adds a rule */
136 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
137                                              enum YAZ_RECORD_CONV_RULE type)
138 {
139     struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
140     r->which = type;
141     r->next = 0;
142     *p->rules_p = r;
143     p->rules_p = &r->next;
144     return r;
145 }
146
147 /** \brief parse 'xslt' conversion node */
148 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
149 {
150 #if YAZ_HAVE_XSLT
151     struct _xmlAttr *attr;
152     const char *stylesheet = 0;
153
154     for (attr = ptr->properties; attr; attr = attr->next)
155     {
156         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
157             attr->children && attr->children->type == XML_TEXT_NODE)
158             stylesheet = (const char *) attr->children->content;
159         else
160         {
161             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
162                          "Expected stylesheet.", attr->name);
163             return -1;
164         }
165     }
166     if (!stylesheet)
167     {
168         wrbuf_printf(p->wr_error, "Element <xslt>: "
169                      "attribute 'stylesheet' expected");
170         return -1;
171     }
172     else
173     {
174         char fullpath[1024];
175         xsltStylesheetPtr xsp;
176         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
177         {
178             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
179                          " could not locate stylesheet '%s' with path '%s'",
180                          stylesheet, fullpath, p->path);
181             return -1;
182         }
183         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
184         if (!xsp)
185         {
186             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
187                          " parsing stylesheet '%s' with path '%s' failed,"
188 #if YAZ_HAVE_EXSLT
189                          " EXSLT enabled",
190 #else
191                          " EXSLT not supported",
192 #endif
193                          stylesheet, fullpath, p->path);
194             return -1;
195         }
196         else
197         {
198             struct yaz_record_conv_rule *r = 
199                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
200             r->u.xslt.xsp = xsp;
201         }
202     }
203     return 0;
204 #else
205     wrbuf_printf(p->wr_error, "xslt unsupported."
206                  " YAZ compiled without XSLT support");
207     return -1;
208 #endif
209 }
210
211 /** \brief parse 'marc' conversion node */
212 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
213 {
214     struct _xmlAttr *attr;
215     const char *input_charset = 0;
216     const char *output_charset = 0;
217     const char *input_format = 0;
218     const char *output_format = 0;
219     int input_format_mode = 0;
220     int output_format_mode = 0;
221     struct yaz_record_conv_rule *r;
222     yaz_iconv_t cd = 0;
223
224     for (attr = ptr->properties; attr; attr = attr->next)
225     {
226         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
227             attr->children && attr->children->type == XML_TEXT_NODE)
228             input_charset = (const char *) attr->children->content;
229         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
230             attr->children && attr->children->type == XML_TEXT_NODE)
231             output_charset = (const char *) attr->children->content;
232         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
233             attr->children && attr->children->type == XML_TEXT_NODE)
234             input_format = (const char *) attr->children->content;
235         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
236             attr->children && attr->children->type == XML_TEXT_NODE)
237             output_format = (const char *) attr->children->content;
238         else
239         {
240             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
241                          "'inputformat', 'inputcharset', 'outputformat' or"
242                          " 'outputcharset', got attribute '%s'", 
243                          attr->name);
244             return -1;
245         }
246     }
247     if (!input_format)
248     {
249         wrbuf_printf(p->wr_error, "Element <marc>: "
250                      "attribute 'inputformat' required");
251         return -1;
252     }
253     else if (!strcmp(input_format, "marc"))
254     {
255         input_format_mode = YAZ_MARC_ISO2709;
256     }
257     else if (!strcmp(input_format, "xml"))
258     {
259         input_format_mode = YAZ_MARC_MARCXML;
260         /** Libxml2 generates UTF-8 encoding by default .
261             So we convert from UTF-8 to outputcharset (if defined) 
262         */
263         if (!input_charset && output_charset)
264             input_charset = "utf-8";
265     }
266     else
267     {
268         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
269                      " Unsupported input format"
270                      " defined by attribute value", 
271                      input_format);
272         return -1;
273     }
274     
275     if (!output_format)
276     {
277         wrbuf_printf(p->wr_error, 
278                      "Element <marc>: attribute 'outputformat' required");
279         return -1;
280     }
281     else if (!strcmp(output_format, "line"))
282     {
283         output_format_mode = YAZ_MARC_LINE;
284     }
285     else if (!strcmp(output_format, "marcxml"))
286     {
287         output_format_mode = YAZ_MARC_MARCXML;
288         if (input_charset && !output_charset)
289             output_charset = "utf-8";
290     }
291     else if (!strcmp(output_format, "marc"))
292     {
293         output_format_mode = YAZ_MARC_ISO2709;
294     }
295     else if (!strcmp(output_format, "marcxchange"))
296     {
297         output_format_mode = YAZ_MARC_XCHANGE;
298         if (input_charset && !output_charset)
299             output_charset = "utf-8";
300     }
301     else
302     {
303         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
304                      " Unsupported output format"
305                      " defined by attribute value", 
306                      output_format);
307         return -1;
308     }
309     if (input_charset && output_charset)
310     {
311         cd = yaz_iconv_open(output_charset, input_charset);
312         if (!cd)
313         {
314             wrbuf_printf(p->wr_error, 
315                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
316                          " Unsupported character set mapping"
317                          " defined by attribute values",
318                          input_charset, output_charset);
319             return -1;
320         }
321     }
322     else if (input_charset)
323     {
324         wrbuf_printf(p->wr_error, "Element <marc>: "
325                      "attribute 'outputcharset' missing");
326         return -1;
327     }
328     else if (output_charset)
329     {
330         wrbuf_printf(p->wr_error, "Element <marc>: "
331                      "attribute 'inputcharset' missing");
332         return -1;
333     }
334     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
335     r->u.marc.iconv_t = cd;
336
337     r->u.marc.input_format = input_format_mode;
338     r->u.marc.output_format = output_format_mode;
339     return 0;
340 }
341
342 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
343 {
344     const xmlNode *ptr = ptr_v; 
345
346     yaz_record_conv_reset(p);
347
348     /* parsing element children */
349     for (ptr = ptr->children; ptr; ptr = ptr->next)
350         {
351             if (ptr->type != XML_ELEMENT_NODE)
352                 continue;
353             if (!strcmp((const char *) ptr->name, "xslt"))
354                 {
355                     if (conv_xslt(p, ptr))
356                         return -1;
357                 }
358             else if (!strcmp((const char *) ptr->name, "marc"))
359                 {
360                     if (conv_marc(p, ptr))
361                         return -1;
362                 }
363             else
364                 {
365                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
366                                  "<marc> or <xslt> element, got <%s>"
367                                  , ptr->name);
368                     return -1;
369                 }
370         }
371     return 0;
372 }
373
374 int yaz_record_conv_record(yaz_record_conv_t p,
375                            const char *input_record_buf,
376                            size_t input_record_len,
377                            WRBUF output_record)
378 {
379     int ret = 0;
380     WRBUF record = output_record; /* pointer transfer */
381     struct yaz_record_conv_rule *r = p->rules;
382     wrbuf_rewind(p->wr_error);
383     
384     wrbuf_write(record, input_record_buf, input_record_len);
385     for (; ret == 0 && r; r = r->next)
386     {
387         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
388         {
389             yaz_marc_t mt = yaz_marc_create();
390
391             yaz_marc_xml(mt, r->u.marc.output_format);
392
393             if (r->u.marc.iconv_t)
394                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
395             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
396             {
397                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
398                                                wrbuf_len(record));
399                 if (sz > 0)
400                     ret = 0;
401                 else
402                     ret = -1;
403             }
404             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
405             {
406                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
407                                                wrbuf_len(record));
408                 if (!doc)
409                 {
410                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
411                     ret = -1;
412                 }
413                 else
414                 {
415                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
416                     if (ret)
417                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
418                 }
419                 xmlFreeDoc(doc);
420             }
421             else
422             {
423                 wrbuf_printf(p->wr_error, "unsupported input format");
424                 ret = -1;
425             }
426             if (ret == 0)
427             {
428                 wrbuf_rewind(record);
429                 ret = yaz_marc_write_mode(mt, record);
430                 if (ret)
431                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
432             }
433             yaz_marc_destroy(mt);
434         }
435 #if YAZ_HAVE_XSLT
436         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
437         {
438             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
439                                            wrbuf_len(record));
440             if (!doc)
441             {
442                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
443                 ret = -1;
444             }
445             else
446             {
447                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
448                 if (res)
449                 {
450                     xmlChar *out_buf = 0;
451                     int out_len;
452
453 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
454                     xsltSaveResultToString(&out_buf, &out_len, res,
455                                            r->u.xslt.xsp); 
456 #else
457                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
458 #endif
459                     if (!out_buf)
460                     {
461                         wrbuf_printf(p->wr_error,
462                                      "xsltSaveResultToString failed");
463                         ret = -1;
464                     }
465                     else
466                     {
467                         wrbuf_rewind(record);
468                         wrbuf_write(record, (const char *) out_buf, out_len);
469                         
470                         xmlFree(out_buf);
471                     }
472                     xmlFreeDoc(res);
473                 }
474                 else
475                 {
476                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
477                     ret = -1;
478                 }
479                 xmlFreeDoc(doc);
480             }
481         }
482 #endif
483     }
484     return ret;
485 }
486
487 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
488 {
489     return wrbuf_buf(p->wr_error);
490 }
491
492 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
493 {
494     xfree(p->path);
495     p->path = 0;
496     if (path)
497         p->path = xstrdup(path);
498 }
499 #endif
500
501 /*
502  * Local variables:
503  * c-basic-offset: 4
504  * indent-tabs-mode: nil
505  * End:
506  * vim: shiftwidth=4 tabstop=8 expandtab
507  */
508