18097244387df11b3355d033714bab7c7762e874
[yaz-moved-to-github.git] / src / record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2012 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_conv.c
7  * \brief Record Conversions utility
8  */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
20 #include <yaz/nmem.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23
24 #if YAZ_HAVE_XML2
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
28 #if YAZ_HAVE_XSLT
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31 #endif
32 #if YAZ_HAVE_EXSLT
33 #include <libexslt/exslt.h>
34 #endif
35
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38     /** \brief memory for configuration */
39     NMEM nmem;
40
41     /** \brief conversion rules (allocated using NMEM) */
42     struct yaz_record_conv_rule *rules;
43
44     /** \brief pointer to last conversion rule pointer in chain */
45     struct yaz_record_conv_rule **rules_p;
46
47     /** \brief string buffer for error messages */
48     WRBUF wr_error;
49
50     /** \brief path for opening files  */
51     char *path;
52 };
53
54 struct marc_info {
55     NMEM nmem;
56     const char *input_charset;
57     const char *output_charset;
58     int input_format_mode;
59     int output_format_mode;
60     const char *leader_spec;
61 };
62
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65     struct yaz_record_conv_type *type;
66     void *info;
67     struct yaz_record_conv_rule *next;
68 };
69
70 /** \brief reset rules+configuration */
71 static void yaz_record_conv_reset(yaz_record_conv_t p)
72 {
73
74     struct yaz_record_conv_rule *r;
75     for (r = p->rules; r; r = r->next)
76     {
77         r->type->destroy(r->info);
78     }
79     wrbuf_rewind(p->wr_error);
80     nmem_reset(p->nmem);
81
82     p->rules = 0;
83
84     p->rules_p = &p->rules;
85 }
86
87 void yaz_record_conv_destroy(yaz_record_conv_t p)
88 {
89     if (p)
90     {
91         yaz_record_conv_reset(p);
92         nmem_destroy(p->nmem);
93         wrbuf_destroy(p->wr_error);
94
95         xfree(p->path);
96         xfree(p);
97     }
98 }
99
100 #if YAZ_HAVE_XSLT
101 struct xslt_info {
102     NMEM nmem;
103     xmlDocPtr xsp_doc;
104     const char **xsl_parms;
105 };
106
107 static void *construct_xslt(const xmlNode *ptr,
108                             const char *path, WRBUF wr_error)
109 {
110     struct _xmlAttr *attr;
111     const char *stylesheet = 0;
112     struct xslt_info *info = 0;
113     NMEM nmem = 0;
114     int max_parms = 10;
115     int no_parms = 0;
116
117     if (strcmp((const char *) ptr->name, "xslt"))
118         return 0;
119
120     for (attr = ptr->properties; attr; attr = attr->next)
121     {
122         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
123             attr->children && attr->children->type == XML_TEXT_NODE)
124             stylesheet = (const char *) attr->children->content;
125         else
126         {
127             wrbuf_printf(wr_error, "Bad attribute '%s'"
128                          "Expected stylesheet.", attr->name);
129             return 0;
130         }
131     }
132     nmem = nmem_create();
133     info = nmem_malloc(nmem, sizeof(*info));
134     info->nmem = nmem;
135     info->xsl_parms = nmem_malloc(
136         nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms));
137
138     for (ptr = ptr->children; ptr; ptr = ptr->next)
139     {
140         const char *name = 0;
141         const char *value = 0;
142         char *qvalue = 0;
143         if (ptr->type != XML_ELEMENT_NODE)
144             continue;
145         if (strcmp((const char *) ptr->name, "param"))
146         {
147             wrbuf_printf(wr_error, "Bad element '%s'"
148                          "Expected param.", ptr->name);
149             nmem_destroy(nmem);
150             return 0;
151         }
152         for (attr = ptr->properties; attr; attr = attr->next)
153         {
154             if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
155                 attr->children && attr->children->type == XML_TEXT_NODE)
156                 name = (const char *) attr->children->content;
157             else if (!xmlStrcmp(attr->name, BAD_CAST "value") &&
158                 attr->children && attr->children->type == XML_TEXT_NODE)
159                 value = (const char *) attr->children->content;
160             else
161             {
162                 wrbuf_printf(wr_error, "Bad attribute '%s'"
163                              "Expected name or value.", attr->name);
164                 nmem_destroy(nmem);
165                 return 0;
166             }
167         }
168         if (!name || !value)
169         {
170             wrbuf_printf(wr_error, "Missing attributes name or value");
171             nmem_destroy(nmem);
172             return 0;
173         }
174         if (no_parms >= max_parms)
175         {
176             wrbuf_printf(wr_error, "Too many parameters given");
177             nmem_destroy(nmem);
178             return 0;
179         }
180
181         qvalue = nmem_malloc(nmem, strlen(value) + 3);
182         strcpy(qvalue, "\'");
183         strcat(qvalue, value);
184         strcat(qvalue, "\'");
185
186         info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name);
187         info->xsl_parms[2 * no_parms + 1] = qvalue;
188         no_parms++;
189     }
190
191     info->xsl_parms[2 * no_parms] = '\0';
192
193     if (!stylesheet)
194     {
195         wrbuf_printf(wr_error, "Element <xslt>: "
196                      "attribute 'stylesheet' expected");
197         return 0;
198     }
199     else
200     {
201         char fullpath[1024];
202         xsltStylesheetPtr xsp;
203         if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
204         {
205             wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
206                          " could not locate stylesheet '%s'",
207                          stylesheet, stylesheet);
208             if (path)
209                 wrbuf_printf(wr_error, " with path '%s'", path);
210                 
211             return 0;
212         }
213         info->xsp_doc = xmlParseFile(fullpath);
214         if (!info->xsp_doc)
215         {
216             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
217                          " xml parse failed: %s", stylesheet, fullpath);
218             if (path)
219                 wrbuf_printf(wr_error, " with path '%s'", path);
220             return 0;
221         }
222         /* need to copy this before passing it to the processor. It will
223            be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
224         xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1));
225         if (!xsp)
226         {
227             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
228                          " xslt parse failed: %s", stylesheet, fullpath);
229             if (path)
230                 wrbuf_printf(wr_error, " with path '%s'", path);
231             wrbuf_printf(wr_error, " ("
232 #if YAZ_HAVE_EXSLT
233                          
234                          "EXSLT enabled"
235 #else
236                          "EXSLT not supported"
237 #endif
238                          ")");
239             xmlFreeDoc(info->xsp_doc);
240             nmem_destroy(info->nmem);
241             return 0;
242         }
243         else
244         {
245             xsltFreeStylesheet(xsp);
246             return info;
247         }
248     }
249     return 0;
250 }
251
252 static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error)
253 {
254     int ret = 0;
255     struct xslt_info *info = vinfo;
256
257     xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
258                                    wrbuf_len(record));
259     if (!doc)
260     {
261         wrbuf_printf(wr_error, "xmlParseMemory failed");
262         ret = -1;
263     }
264     else
265     {
266         xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1);
267         xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
268         xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms);
269         if (res)
270         {
271             xmlChar *out_buf = 0;
272             int out_len;
273             
274 #if HAVE_XSLTSAVERESULTTOSTRING
275             xsltSaveResultToString(&out_buf, &out_len, res, xsp);
276 #else
277             xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
278 #endif
279             if (!out_buf)
280             {
281                 wrbuf_printf(wr_error,
282                              "xsltSaveResultToString failed");
283                 ret = -1;
284             }
285             else
286             {
287                 wrbuf_rewind(record);
288                 wrbuf_write(record, (const char *) out_buf, out_len);
289                 
290                 xmlFree(out_buf);
291             }
292             xmlFreeDoc(res);
293         }
294         else
295         {
296             wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
297             ret = -1;
298         }
299         xmlFreeDoc(doc);
300         xsltFreeStylesheet(xsp); /* frees xsp_doc too */
301     }
302     return ret;
303 }
304
305 static void destroy_xslt(void *vinfo)
306 {
307     struct xslt_info *info = vinfo;
308
309     if (info)
310     {
311         xmlFreeDoc(info->xsp_doc);
312         nmem_destroy(info->nmem);
313     }
314 }
315
316 /* YAZ_HAVE_XSLT */
317 #endif
318
319
320 static void *construct_marc(const xmlNode *ptr,
321                             const char *path, WRBUF wr_error)
322 {
323     NMEM nmem = nmem_create();
324     struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
325     struct _xmlAttr *attr;
326     const char *input_format = 0;
327     const char *output_format = 0;
328
329     if (strcmp((const char *) ptr->name, "marc"))
330     {
331         nmem_destroy(nmem);
332         return 0;
333     }
334
335     info->nmem = nmem;
336     info->input_charset = 0;
337     info->output_charset = 0;
338     info->input_format_mode = 0;
339     info->output_format_mode = 0;
340     info->leader_spec = 0;
341
342     for (attr = ptr->properties; attr; attr = attr->next)
343     {
344         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
345             attr->children && attr->children->type == XML_TEXT_NODE)
346             info->input_charset = (const char *) attr->children->content;
347         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
348             attr->children && attr->children->type == XML_TEXT_NODE)
349             info->output_charset = (const char *) attr->children->content;
350         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
351             attr->children && attr->children->type == XML_TEXT_NODE)
352             input_format = (const char *) attr->children->content;
353         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
354             attr->children && attr->children->type == XML_TEXT_NODE)
355             output_format = (const char *) attr->children->content;
356         else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
357                  attr->children && attr->children->type == XML_TEXT_NODE)
358             info->leader_spec =
359                 nmem_strdup(info->nmem,(const char *) attr->children->content);
360         else
361         {
362             wrbuf_printf(wr_error, "Element <marc>: expected attributes"
363                          "'inputformat', 'inputcharset', 'outputformat' or"
364                          " 'outputcharset', got attribute '%s'", 
365                          attr->name);
366             nmem_destroy(info->nmem);
367             return 0;
368         }
369     }
370     if (!input_format)
371     {
372         wrbuf_printf(wr_error, "Element <marc>: "
373                      "attribute 'inputformat' required");
374         nmem_destroy(info->nmem);
375         return 0;
376     }
377     else if (!strcmp(input_format, "marc"))
378     {
379         info->input_format_mode = YAZ_MARC_ISO2709;
380     }
381     else if (!strcmp(input_format, "xml"))
382     {
383         info->input_format_mode = YAZ_MARC_MARCXML;
384         /** Libxml2 generates UTF-8 encoding by default .
385             So we convert from UTF-8 to outputcharset (if defined) 
386         */
387         if (!info->input_charset && info->output_charset)
388             info->input_charset = "utf-8";
389     }
390     else
391     {
392         wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
393                      " Unsupported input format"
394                      " defined by attribute value", 
395                      input_format);
396         nmem_destroy(info->nmem);
397         return 0;
398     }
399     
400     if (!output_format)
401     {
402         wrbuf_printf(wr_error, 
403                      "Element <marc>: attribute 'outputformat' required");
404         nmem_destroy(info->nmem);
405         return 0;
406     }
407     else if (!strcmp(output_format, "line"))
408     {
409         info->output_format_mode = YAZ_MARC_LINE;
410     }
411     else if (!strcmp(output_format, "marcxml"))
412     {
413         info->output_format_mode = YAZ_MARC_MARCXML;
414         if (info->input_charset && !info->output_charset)
415             info->output_charset = "utf-8";
416     }
417     else if (!strcmp(output_format, "turbomarc"))
418     {
419         info->output_format_mode = YAZ_MARC_TURBOMARC;
420         if (info->input_charset && !info->output_charset)
421             info->output_charset = "utf-8";
422     }
423     else if (!strcmp(output_format, "marc"))
424     {
425         info->output_format_mode = YAZ_MARC_ISO2709;
426     }
427     else if (!strcmp(output_format, "marcxchange"))
428     {
429         info->output_format_mode = YAZ_MARC_XCHANGE;
430         if (info->input_charset && !info->output_charset)
431             info->output_charset = "utf-8";
432     }
433     else
434     {
435         wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
436                      " Unsupported output format"
437                      " defined by attribute value", 
438                      output_format);
439         nmem_destroy(info->nmem);
440         return 0;
441     }
442     if (info->input_charset && info->output_charset)
443     {
444         yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
445                                         info->input_charset);
446         if (!cd)
447         {
448             wrbuf_printf(wr_error, 
449                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
450                          " Unsupported character set mapping"
451                          " defined by attribute values",
452                          info->input_charset, info->output_charset);
453             nmem_destroy(info->nmem);
454             return 0;
455         }
456         yaz_iconv_close(cd);
457     }
458     else if (!info->output_charset)
459     {
460         wrbuf_printf(wr_error, "Element <marc>: "
461                      "attribute 'outputcharset' missing");
462         nmem_destroy(info->nmem);
463         return 0;
464     }
465     else if (!info->input_charset)
466     {
467         wrbuf_printf(wr_error, "Element <marc>: "
468                      "attribute 'inputcharset' missing");
469         nmem_destroy(info->nmem);
470         return 0;
471     }
472     info->input_charset = nmem_strdup(info->nmem, info->input_charset);
473     info->output_charset = nmem_strdup(info->nmem, info->output_charset);
474     return info;
475 }
476
477 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
478 {
479     struct marc_info *mi = info;
480     int ret = 0;
481     
482     yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
483     yaz_marc_t mt = yaz_marc_create();
484     
485     yaz_marc_xml(mt, mi->output_format_mode);
486     if (mi->leader_spec)
487         yaz_marc_leader_spec(mt, mi->leader_spec);
488         
489     if (cd)
490         yaz_marc_iconv(mt, cd);
491     if (mi->input_format_mode == YAZ_MARC_ISO2709)
492     {
493         int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
494                                        wrbuf_len(record));
495         if (sz > 0)
496             ret = 0;
497         else
498             ret = -1;
499     }
500     else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
501              mi->input_format_mode == YAZ_MARC_TURBOMARC)
502     {
503         xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
504                                        wrbuf_len(record));
505         if (!doc)
506         {
507             wrbuf_printf(wr_error, "xmlParseMemory failed");
508             ret = -1;
509         }
510         else
511         {
512             ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
513             if (ret)
514                 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
515         }
516         xmlFreeDoc(doc);
517     }
518     else
519     {
520         wrbuf_printf(wr_error, "unsupported input format");
521         ret = -1;
522     }
523     if (ret == 0)
524     {
525         wrbuf_rewind(record);
526         ret = yaz_marc_write_mode(mt, record);
527         if (ret)
528             wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
529     }
530     if (cd)
531         yaz_iconv_close(cd);
532     yaz_marc_destroy(mt);
533     return ret;
534 }
535
536 static void destroy_marc(void *info)
537 {
538     struct marc_info *mi = info;
539     
540     nmem_destroy(mi->nmem);
541 }
542
543 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
544                                 struct yaz_record_conv_type *types)
545 {
546     struct yaz_record_conv_type bt[2];
547     
548     /* register marc */
549     bt[0].construct = construct_marc;
550     bt[0].convert = convert_marc;
551     bt[0].destroy = destroy_marc;
552
553 #if YAZ_HAVE_XSLT
554     /* register xslt */
555     bt[0].next = &bt[1];
556     bt[1].next = types;
557     bt[1].construct = construct_xslt;
558     bt[1].convert = convert_xslt;
559     bt[1].destroy = destroy_xslt;
560 #else
561     bt[0].next = types;
562 #endif
563     
564     yaz_record_conv_reset(p);
565
566     /* parsing element children */
567     for (ptr = ptr->children; ptr; ptr = ptr->next)
568     {
569         struct yaz_record_conv_type *t;
570         struct yaz_record_conv_rule *r;
571         void *info = 0;
572         if (ptr->type != XML_ELEMENT_NODE)
573             continue;
574         for (t = &bt[0]; t; t = t->next)
575         {
576             wrbuf_rewind(p->wr_error);
577             info = t->construct(ptr, p->path, p->wr_error);
578
579             if (info || wrbuf_len(p->wr_error))
580                 break;
581             /* info== 0 and no error reported , ie not handled by it */
582         }
583         if (!info)
584         {
585             if (wrbuf_len(p->wr_error) == 0)
586                 wrbuf_printf(p->wr_error, "Element <backend>: expected "
587                              "<marc> or <xslt> element, got <%s>"
588                              , ptr->name);
589             return -1;
590         }
591         r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
592         r->next = 0;
593         r->info = info;
594         r->type = nmem_malloc(p->nmem, sizeof(*t));
595         memcpy(r->type, t, sizeof(*t));
596         *p->rules_p = r;
597         p->rules_p = &r->next;
598     }
599     return 0;
600 }
601
602 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
603 {
604     return yaz_record_conv_configure_t(p, ptr, 0);
605 }
606
607 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
608                                        struct yaz_record_conv_rule *r,
609                                        const char *input_record_buf,
610                                        size_t input_record_len,
611                                        WRBUF output_record)
612 {
613     int ret = 0;
614     WRBUF record = output_record; /* pointer transfer */
615     wrbuf_rewind(p->wr_error);
616     
617     wrbuf_write(record, input_record_buf, input_record_len);
618     for (; ret == 0 && r; r = r->next)
619         ret = r->type->convert(r->info, record, p->wr_error);
620     return ret;
621 }
622
623 int yaz_record_conv_opac_record(yaz_record_conv_t p,
624                                 Z_OPACRecord *input_record,
625                                 WRBUF output_record)
626 {
627     int ret = 0;
628     struct yaz_record_conv_rule *r = p->rules;
629     if (!r || r->type->construct != construct_marc)
630         ret = -1; /* no marc rule so we can't do OPAC */
631     else
632     {
633         struct marc_info *mi = r->info;
634
635         WRBUF res = wrbuf_alloc();
636         yaz_marc_t mt = yaz_marc_create();
637         yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
638                                         mi->input_charset);
639         
640         wrbuf_rewind(p->wr_error);
641         yaz_marc_xml(mt, mi->output_format_mode);
642         
643         yaz_marc_iconv(mt, cd);
644         
645         yaz_opac_decode_wrbuf(mt, input_record, res);
646         if (ret != -1)
647         {
648             ret = yaz_record_conv_record_rule(p, 
649                                               r->next,
650                                               wrbuf_buf(res), wrbuf_len(res),
651                                               output_record);
652         }
653         yaz_marc_destroy(mt);
654         if (cd)
655             yaz_iconv_close(cd);
656         wrbuf_destroy(res);
657     }
658     return ret;
659 }
660
661 int yaz_record_conv_record(yaz_record_conv_t p,
662                            const char *input_record_buf,
663                            size_t input_record_len,
664                            WRBUF output_record)
665 {
666     return yaz_record_conv_record_rule(p, p->rules,
667                                        input_record_buf,
668                                        input_record_len, output_record);
669 }
670
671 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
672 {
673     return wrbuf_cstr(p->wr_error);
674 }
675
676 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
677 {
678     xfree(p->path);
679     p->path = 0;
680     if (path)
681         p->path = xstrdup(path);
682 }
683
684 yaz_record_conv_t yaz_record_conv_create()
685 {
686     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
687     p->nmem = nmem_create();
688     p->wr_error = wrbuf_alloc();
689     p->rules = 0;
690     p->path = 0;
691 #if YAZ_HAVE_EXSLT
692     exsltRegisterAll(); 
693 #endif    
694     return p;
695 }
696
697 /* YAZ_HAVE_XML2 */
698 #endif
699
700 /*
701  * Local variables:
702  * c-basic-offset: 4
703  * c-file-style: "Stroustrup"
704  * indent-tabs-mode: nil
705  * End:
706  * vim: shiftwidth=4 tabstop=8 expandtab
707  */
708