Merge branch 'master' of ssh://git.indexdata.com/home/git/pub/yaz
[yaz-moved-to-github.git] / src / record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_conv.c
7  * \brief Record Conversions utility
8  */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
20 #include <yaz/nmem.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23
24 #if YAZ_HAVE_XML2
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
28 #if YAZ_HAVE_XSLT
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31 #endif
32 #if YAZ_HAVE_EXSLT
33 #include <libexslt/exslt.h>
34 #endif
35
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38     /** \brief memory for configuration */
39     NMEM nmem;
40
41     /** \brief conversion rules (allocated using NMEM) */
42     struct yaz_record_conv_rule *rules;
43
44     /** \brief pointer to last conversion rule pointer in chain */
45     struct yaz_record_conv_rule **rules_p;
46
47     /** \brief string buffer for error messages */
48     WRBUF wr_error;
49
50     /** \brief path for opening files  */
51     char *path;
52 };
53
54 /** \brief tranformation types (rule types) */
55 enum YAZ_RECORD_CONV_RULE 
56 {
57     YAZ_RECORD_CONV_RULE_XSLT,
58     YAZ_RECORD_CONV_RULE_MARC
59 };
60
61 /** \brief tranformation info (rule info) */
62 struct yaz_record_conv_rule {
63     enum YAZ_RECORD_CONV_RULE which;
64     union {
65 #if YAZ_HAVE_XSLT
66         struct {
67             xmlDocPtr xsp_doc;
68         } xslt;
69 #endif
70         struct {
71             const char *input_charset;
72             const char *output_charset;
73             int input_format;
74             int output_format;
75         } marc;
76     } u;
77     struct yaz_record_conv_rule *next;
78 };
79
80 /** \brief reset rules+configuration */
81 static void yaz_record_conv_reset(yaz_record_conv_t p)
82 {
83
84     struct yaz_record_conv_rule *r;
85     for (r = p->rules; r; r = r->next)
86     {
87         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
88         {
89             ;
90         }
91 #if YAZ_HAVE_XSLT
92         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
93         {
94             xmlFreeDoc(r->u.xslt.xsp_doc);
95         }
96 #endif
97     }
98     wrbuf_rewind(p->wr_error);
99     nmem_reset(p->nmem);
100
101     p->rules = 0;
102
103     p->rules_p = &p->rules;
104 }
105
106 yaz_record_conv_t yaz_record_conv_create()
107 {
108     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
109     p->nmem = nmem_create();
110     p->wr_error = wrbuf_alloc();
111     p->rules = 0;
112     p->path = 0;
113
114 #if YAZ_HAVE_EXSLT
115     exsltRegisterAll(); 
116 #endif
117     yaz_record_conv_reset(p);
118     return p;
119 }
120
121 void yaz_record_conv_destroy(yaz_record_conv_t p)
122 {
123     if (p)
124     {
125         yaz_record_conv_reset(p);
126         nmem_destroy(p->nmem);
127         wrbuf_destroy(p->wr_error);
128         xfree(p->path);
129         xfree(p);
130     }
131 }
132
133 /** \brief adds a rule */
134 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
135                                              enum YAZ_RECORD_CONV_RULE type)
136 {
137     struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
138         nmem_malloc(p->nmem, sizeof(*r));
139     r->which = type;
140     r->next = 0;
141     *p->rules_p = r;
142     p->rules_p = &r->next;
143     return r;
144 }
145
146 /** \brief parse 'xslt' conversion node */
147 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
148 {
149 #if YAZ_HAVE_XSLT
150     struct _xmlAttr *attr;
151     const char *stylesheet = 0;
152
153     for (attr = ptr->properties; attr; attr = attr->next)
154     {
155         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
156             attr->children && attr->children->type == XML_TEXT_NODE)
157             stylesheet = (const char *) attr->children->content;
158         else
159         {
160             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
161                          "Expected stylesheet.", attr->name);
162             return -1;
163         }
164     }
165     if (!stylesheet)
166     {
167         wrbuf_printf(p->wr_error, "Element <xslt>: "
168                      "attribute 'stylesheet' expected");
169         return -1;
170     }
171     else
172     {
173         char fullpath[1024];
174         xsltStylesheetPtr xsp;
175         xmlDocPtr xsp_doc;
176         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
177         {
178             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
179                          " could not locate stylesheet '%s'",
180                          stylesheet, fullpath);
181             if (p->path)
182                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
183                 
184             return -1;
185         }
186         xsp_doc = xmlParseFile(fullpath);
187         if (!xsp_doc)
188         {
189             wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
190                          " xml parse failed: %s", stylesheet, fullpath);
191             if (p->path)
192                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
193             return -1;
194         }
195         /* need to copy this before passing it to the processor. It will
196            be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
197         xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
198         if (!xsp)
199         {
200             wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
201                          " xslt parse failed: %s", stylesheet, fullpath);
202             if (p->path)
203                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
204             wrbuf_printf(p->wr_error, " ("
205 #if YAZ_HAVE_EXSLT
206                          
207                          "EXSLT enabled"
208 #else
209                          "EXSLT not supported"
210 #endif
211                          ")");
212             xmlFreeDoc(xsp_doc);
213             return -1;
214         }
215         else
216         {
217             struct yaz_record_conv_rule *r = 
218                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
219             r->u.xslt.xsp_doc = xsp_doc;
220             xsltFreeStylesheet(xsp);
221         }
222     }
223     return 0;
224 #else
225     wrbuf_printf(p->wr_error, "xslt unsupported."
226                  " YAZ compiled without XSLT support");
227     return -1;
228 #endif
229 }
230
231 /** \brief parse 'marc' conversion node */
232 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
233 {
234     struct _xmlAttr *attr;
235     const char *input_charset = 0;
236     const char *output_charset = 0;
237     const char *input_format = 0;
238     const char *output_format = 0;
239     int input_format_mode = 0;
240     int output_format_mode = 0;
241     struct yaz_record_conv_rule *r;
242
243     for (attr = ptr->properties; attr; attr = attr->next)
244     {
245         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
246             attr->children && attr->children->type == XML_TEXT_NODE)
247             input_charset = (const char *) attr->children->content;
248         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
249             attr->children && attr->children->type == XML_TEXT_NODE)
250             output_charset = (const char *) attr->children->content;
251         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
252             attr->children && attr->children->type == XML_TEXT_NODE)
253             input_format = (const char *) attr->children->content;
254         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
255             attr->children && attr->children->type == XML_TEXT_NODE)
256             output_format = (const char *) attr->children->content;
257         else
258         {
259             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
260                          "'inputformat', 'inputcharset', 'outputformat' or"
261                          " 'outputcharset', got attribute '%s'", 
262                          attr->name);
263             return -1;
264         }
265     }
266     if (!input_format)
267     {
268         wrbuf_printf(p->wr_error, "Element <marc>: "
269                      "attribute 'inputformat' required");
270         return -1;
271     }
272     else if (!strcmp(input_format, "marc"))
273     {
274         input_format_mode = YAZ_MARC_ISO2709;
275     }
276     else if (!strcmp(input_format, "xml"))
277     {
278         input_format_mode = YAZ_MARC_MARCXML;
279         /** Libxml2 generates UTF-8 encoding by default .
280             So we convert from UTF-8 to outputcharset (if defined) 
281         */
282         if (!input_charset && output_charset)
283             input_charset = "utf-8";
284     }
285     else
286     {
287         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
288                      " Unsupported input format"
289                      " defined by attribute value", 
290                      input_format);
291         return -1;
292     }
293     
294     if (!output_format)
295     {
296         wrbuf_printf(p->wr_error, 
297                      "Element <marc>: attribute 'outputformat' required");
298         return -1;
299     }
300     else if (!strcmp(output_format, "line"))
301     {
302         output_format_mode = YAZ_MARC_LINE;
303     }
304     else if (!strcmp(output_format, "marcxml"))
305     {
306         output_format_mode = YAZ_MARC_MARCXML;
307         if (input_charset && !output_charset)
308             output_charset = "utf-8";
309     }
310     else if (!strcmp(output_format, "turbomarc"))
311     {
312         output_format_mode = YAZ_MARC_TURBOMARC;
313         if (input_charset && !output_charset)
314             output_charset = "utf-8";
315     }
316     else if (!strcmp(output_format, "marc"))
317     {
318         output_format_mode = YAZ_MARC_ISO2709;
319     }
320     else if (!strcmp(output_format, "marcxchange"))
321     {
322         output_format_mode = YAZ_MARC_XCHANGE;
323         if (input_charset && !output_charset)
324             output_charset = "utf-8";
325     }
326     else
327     {
328         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
329                      " Unsupported output format"
330                      " defined by attribute value", 
331                      output_format);
332         return -1;
333     }
334     if (input_charset && output_charset)
335     {
336         yaz_iconv_t cd = yaz_iconv_open(output_charset, input_charset);
337         if (!cd)
338         {
339             wrbuf_printf(p->wr_error, 
340                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
341                          " Unsupported character set mapping"
342                          " defined by attribute values",
343                          input_charset, output_charset);
344             return -1;
345         }
346         yaz_iconv_close(cd);
347     }
348     else if (input_charset)
349     {
350         wrbuf_printf(p->wr_error, "Element <marc>: "
351                      "attribute 'outputcharset' missing");
352         return -1;
353     }
354     else if (output_charset)
355     {
356         wrbuf_printf(p->wr_error, "Element <marc>: "
357                      "attribute 'inputcharset' missing");
358         return -1;
359     }
360     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
361
362     r->u.marc.input_charset = nmem_strdup(p->nmem, input_charset);
363     r->u.marc.output_charset = nmem_strdup(p->nmem, output_charset);
364     r->u.marc.input_format = input_format_mode;
365     r->u.marc.output_format = output_format_mode;
366     return 0;
367 }
368
369 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
370 {
371     yaz_record_conv_reset(p);
372
373     /* parsing element children */
374     for (ptr = ptr->children; ptr; ptr = ptr->next)
375         {
376             if (ptr->type != XML_ELEMENT_NODE)
377                 continue;
378             if (!strcmp((const char *) ptr->name, "xslt"))
379                 {
380                     if (conv_xslt(p, ptr))
381                         return -1;
382                 }
383             else if (!strcmp((const char *) ptr->name, "marc"))
384                 {
385                     if (conv_marc(p, ptr))
386                         return -1;
387                 }
388             else
389                 {
390                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
391                                  "<marc> or <xslt> element, got <%s>"
392                                  , ptr->name);
393                     return -1;
394                 }
395         }
396     return 0;
397 }
398
399 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
400                                        struct yaz_record_conv_rule *r,
401                                        const char *input_record_buf,
402                                        size_t input_record_len,
403                                        WRBUF output_record);
404
405 int yaz_record_conv_opac_record(yaz_record_conv_t p,
406                                 Z_OPACRecord *input_record,
407                                 WRBUF output_record)
408 {
409     int ret = 0;
410     struct yaz_record_conv_rule *r = p->rules;
411     if (!r || r->which != YAZ_RECORD_CONV_RULE_MARC)
412         ret = -1; /* no marc rule so we can't do OPAC */
413     else
414     {
415         WRBUF res = wrbuf_alloc();
416         yaz_marc_t mt = yaz_marc_create();
417         yaz_iconv_t cd = yaz_iconv_open(r->u.marc.output_charset,
418                                         r->u.marc.input_charset);
419         
420         wrbuf_rewind(p->wr_error);
421         yaz_marc_xml(mt, r->u.marc.output_format);
422         
423         yaz_marc_iconv(mt, cd);
424         
425         yaz_opac_decode_wrbuf(mt, input_record, res);
426         if (ret != -1)
427         {
428             ret = yaz_record_conv_record_rule(p, 
429                                               r->next,
430                                               wrbuf_buf(res), wrbuf_len(res),
431                                               output_record);
432         }
433         yaz_marc_destroy(mt);
434         if (cd)
435             yaz_iconv_close(cd);
436         wrbuf_destroy(res);
437     }
438     return ret;
439 }
440
441 int yaz_record_conv_record(yaz_record_conv_t p,
442                            const char *input_record_buf,
443                            size_t input_record_len,
444                            WRBUF output_record)
445 {
446     return yaz_record_conv_record_rule(p, p->rules,
447                                        input_record_buf,
448                                        input_record_len, output_record);
449 }
450
451 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
452                                        struct yaz_record_conv_rule *r,
453                                        const char *input_record_buf,
454                                        size_t input_record_len,
455                                        WRBUF output_record)
456 {
457     int ret = 0;
458     WRBUF record = output_record; /* pointer transfer */
459     wrbuf_rewind(p->wr_error);
460     
461     wrbuf_write(record, input_record_buf, input_record_len);
462     for (; ret == 0 && r; r = r->next)
463     {
464         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
465         {
466             yaz_iconv_t cd = 
467                 yaz_iconv_open(r->u.marc.output_charset,
468                                r->u.marc.input_charset);
469             yaz_marc_t mt = yaz_marc_create();
470
471             yaz_marc_xml(mt, r->u.marc.output_format);
472
473             if (cd)
474                 yaz_marc_iconv(mt, cd);
475             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
476             {
477                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
478                                                wrbuf_len(record));
479                 if (sz > 0)
480                     ret = 0;
481                 else
482                     ret = -1;
483             }
484             else if (r->u.marc.input_format == YAZ_MARC_MARCXML ||
485                      r->u.marc.input_format == YAZ_MARC_TURBOMARC)
486             {
487                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
488                                                wrbuf_len(record));
489                 if (!doc)
490                 {
491                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
492                     ret = -1;
493                 }
494                 else
495                 {
496                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
497                     if (ret)
498                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
499                 }
500                 xmlFreeDoc(doc);
501             }
502             else
503             {
504                 wrbuf_printf(p->wr_error, "unsupported input format");
505                 ret = -1;
506             }
507             if (ret == 0)
508             {
509                 wrbuf_rewind(record);
510                 ret = yaz_marc_write_mode(mt, record);
511                 if (ret)
512                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
513             }
514             if (cd)
515                 yaz_iconv_close(cd);
516             yaz_marc_destroy(mt);
517         }
518 #if YAZ_HAVE_XSLT
519         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
520         {
521             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
522                                            wrbuf_len(record));
523             if (!doc)
524             {
525                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
526                 ret = -1;
527             }
528             else
529             {
530                 xmlDocPtr xsp_doc = xmlCopyDoc(r->u.xslt.xsp_doc, 1);
531                 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
532                 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
533                 if (res)
534                 {
535                     xmlChar *out_buf = 0;
536                     int out_len;
537
538 #if HAVE_XSLTSAVERESULTTOSTRING
539                     xsltSaveResultToString(&out_buf, &out_len, res, xsp);
540 #else
541                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
542 #endif
543                     if (!out_buf)
544                     {
545                         wrbuf_printf(p->wr_error,
546                                      "xsltSaveResultToString failed");
547                         ret = -1;
548                     }
549                     else
550                     {
551                         wrbuf_rewind(record);
552                         wrbuf_write(record, (const char *) out_buf, out_len);
553                         
554                         xmlFree(out_buf);
555                     }
556                     xmlFreeDoc(res);
557                 }
558                 else
559                 {
560                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
561                     ret = -1;
562                 }
563                 xmlFreeDoc(doc);
564                 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
565             }
566         }
567 #endif
568     }
569     return ret;
570 }
571
572 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
573 {
574     return wrbuf_cstr(p->wr_error);
575 }
576
577 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
578 {
579     xfree(p->path);
580     p->path = 0;
581     if (path)
582         p->path = xstrdup(path);
583 }
584 #endif
585
586 /*
587  * Local variables:
588  * c-basic-offset: 4
589  * c-file-style: "Stroustrup"
590  * indent-tabs-mode: nil
591  * End:
592  * vim: shiftwidth=4 tabstop=8 expandtab
593  */
594