Better Z39.50 search/present handling
[yaz-moved-to-github.git] / src / record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_conv.c
7  * \brief Record Conversions utility
8  */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
20 #include <yaz/nmem.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23
24 #if YAZ_HAVE_XML2
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
28 #if YAZ_HAVE_XSLT
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31 #endif
32 #if YAZ_HAVE_EXSLT
33 #include <libexslt/exslt.h>
34 #endif
35
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38     /** \brief memory for configuration */
39     NMEM nmem;
40
41     /** \brief conversion rules (allocated using NMEM) */
42     struct yaz_record_conv_rule *rules;
43
44     /** \brief pointer to last conversion rule pointer in chain */
45     struct yaz_record_conv_rule **rules_p;
46
47     /** \brief string buffer for error messages */
48     WRBUF wr_error;
49
50     /** \brief path for opening files  */
51     char *path;
52 };
53
54 struct marc_info {
55     NMEM nmem;
56     const char *input_charset;
57     const char *output_charset;
58     int input_format_mode;
59     int output_format_mode;
60     const char *leader_spec;
61 };
62
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65     struct yaz_record_conv_type *type;
66     void *info;
67     struct yaz_record_conv_rule *next;
68 };
69
70 /** \brief reset rules+configuration */
71 static void yaz_record_conv_reset(yaz_record_conv_t p)
72 {
73
74     struct yaz_record_conv_rule *r;
75     for (r = p->rules; r; r = r->next)
76     {
77         r->type->destroy(r->info);
78     }
79     wrbuf_rewind(p->wr_error);
80     nmem_reset(p->nmem);
81
82     p->rules = 0;
83
84     p->rules_p = &p->rules;
85 }
86
87 void yaz_record_conv_destroy(yaz_record_conv_t p)
88 {
89     if (p)
90     {
91         yaz_record_conv_reset(p);
92         nmem_destroy(p->nmem);
93         wrbuf_destroy(p->wr_error);
94
95         xfree(p->path);
96         xfree(p);
97     }
98 }
99
100 #if YAZ_HAVE_XSLT
101 struct xslt_info {
102     NMEM nmem;
103     xmlDocPtr xsp_doc;
104     const char **xsl_parms;
105 };
106
107 static void *construct_xslt(const xmlNode *ptr,
108                             const char *path, WRBUF wr_error)
109 {
110     struct _xmlAttr *attr;
111     const char *stylesheet = 0;
112     struct xslt_info *info = 0;
113     NMEM nmem = 0;
114     int max_parms = 10;
115     int no_parms = 0;
116
117     if (strcmp((const char *) ptr->name, "xslt"))
118         return 0;
119
120     for (attr = ptr->properties; attr; attr = attr->next)
121     {
122         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
123             attr->children && attr->children->type == XML_TEXT_NODE)
124             stylesheet = (const char *) attr->children->content;
125         else
126         {
127             wrbuf_printf(wr_error, "Bad attribute '%s'"
128                          "Expected stylesheet.", attr->name);
129             return 0;
130         }
131     }
132     nmem = nmem_create();
133     info = nmem_malloc(nmem, sizeof(*info));
134     info->nmem = nmem;
135     info->xsl_parms = nmem_malloc(
136         nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms));
137
138     for (ptr = ptr->children; ptr; ptr = ptr->next)
139     {
140         const char *name = 0;
141         const char *value = 0;
142         char *qvalue = 0;
143         if (ptr->type != XML_ELEMENT_NODE)
144             continue;
145         if (strcmp((const char *) ptr->name, "param"))
146         {
147             wrbuf_printf(wr_error, "Bad element '%s'"
148                          "Expected param.", ptr->name);
149             nmem_destroy(nmem);
150             return 0;
151         }
152         for (attr = ptr->properties; attr; attr = attr->next)
153         {
154             if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
155                 attr->children && attr->children->type == XML_TEXT_NODE)
156                 name = (const char *) attr->children->content;
157             else if (!xmlStrcmp(attr->name, BAD_CAST "value") &&
158                 attr->children && attr->children->type == XML_TEXT_NODE)
159                 value = (const char *) attr->children->content;
160             else
161             {
162                 wrbuf_printf(wr_error, "Bad attribute '%s'"
163                              "Expected name or value.", attr->name);
164                 nmem_destroy(nmem);
165                 return 0;
166             }
167         }
168         if (!name || !value)
169         {
170             wrbuf_printf(wr_error, "Missing attributes name or value");
171             nmem_destroy(nmem);
172             return 0;
173         }
174         if (no_parms >= max_parms)
175         {
176             wrbuf_printf(wr_error, "Too many parameters given");
177             nmem_destroy(nmem);
178             return 0;
179         }
180
181         qvalue = nmem_malloc(nmem, strlen(value) + 3);
182         strcpy(qvalue, "\'");
183         strcat(qvalue, value);
184         strcat(qvalue, "\'");
185
186         info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name);
187         info->xsl_parms[2 * no_parms + 1] = qvalue;
188         no_parms++;
189     }
190
191     info->xsl_parms[2 * no_parms] = '\0';
192
193     if (!stylesheet)
194     {
195         wrbuf_printf(wr_error, "Element <xslt>: "
196                      "attribute 'stylesheet' expected");
197         nmem_destroy(nmem);
198     }
199     else
200     {
201         char fullpath[1024];
202         xsltStylesheetPtr xsp;
203         if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
204         {
205             wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
206                          " could not locate stylesheet '%s'",
207                          stylesheet, stylesheet);
208             if (path)
209                 wrbuf_printf(wr_error, " with path '%s'", path);
210
211             nmem_destroy(nmem);
212             return 0;
213         }
214         info->xsp_doc = xmlParseFile(fullpath);
215         if (!info->xsp_doc)
216         {
217             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
218                          " xml parse failed: %s", stylesheet, fullpath);
219             if (path)
220                 wrbuf_printf(wr_error, " with path '%s'", path);
221             nmem_destroy(nmem);
222             return 0;
223         }
224         /* need to copy this before passing it to the processor. It will
225            be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
226         xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1));
227         if (!xsp)
228         {
229             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
230                          " xslt parse failed: %s", stylesheet, fullpath);
231             if (path)
232                 wrbuf_printf(wr_error, " with path '%s'", path);
233             wrbuf_printf(wr_error, " ("
234 #if YAZ_HAVE_EXSLT
235
236                          "EXSLT enabled"
237 #else
238                          "EXSLT not supported"
239 #endif
240                          ")");
241             xmlFreeDoc(info->xsp_doc);
242             nmem_destroy(info->nmem);
243         }
244         else
245         {
246             xsltFreeStylesheet(xsp);
247             return info;
248         }
249     }
250     return 0;
251 }
252
253 static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error)
254 {
255     int ret = 0;
256     struct xslt_info *info = vinfo;
257
258     xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
259                                    wrbuf_len(record));
260     if (!doc)
261     {
262         wrbuf_printf(wr_error, "xmlParseMemory failed");
263         ret = -1;
264     }
265     else
266     {
267         xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1);
268         xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
269         xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms);
270         if (res)
271         {
272             xmlChar *out_buf = 0;
273             int out_len;
274
275 #if HAVE_XSLTSAVERESULTTOSTRING
276             xsltSaveResultToString(&out_buf, &out_len, res, xsp);
277 #else
278             xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
279 #endif
280             if (!out_buf)
281             {
282                 wrbuf_printf(wr_error,
283                              "xsltSaveResultToString failed");
284                 ret = -1;
285             }
286             else
287             {
288                 wrbuf_rewind(record);
289                 wrbuf_write(record, (const char *) out_buf, out_len);
290
291                 xmlFree(out_buf);
292             }
293             xmlFreeDoc(res);
294         }
295         else
296         {
297             wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
298             ret = -1;
299         }
300         xmlFreeDoc(doc);
301         xsltFreeStylesheet(xsp); /* frees xsp_doc too */
302     }
303     return ret;
304 }
305
306 static void destroy_xslt(void *vinfo)
307 {
308     struct xslt_info *info = vinfo;
309
310     if (info)
311     {
312         xmlFreeDoc(info->xsp_doc);
313         nmem_destroy(info->nmem);
314     }
315 }
316
317 /* YAZ_HAVE_XSLT */
318 #endif
319
320
321 static void *construct_marc(const xmlNode *ptr,
322                             const char *path, WRBUF wr_error)
323 {
324     NMEM nmem = nmem_create();
325     struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
326     struct _xmlAttr *attr;
327     const char *input_format = 0;
328     const char *output_format = 0;
329
330     if (strcmp((const char *) ptr->name, "marc"))
331     {
332         nmem_destroy(nmem);
333         return 0;
334     }
335
336     info->nmem = nmem;
337     info->input_charset = 0;
338     info->output_charset = 0;
339     info->input_format_mode = 0;
340     info->output_format_mode = 0;
341     info->leader_spec = 0;
342
343     for (attr = ptr->properties; attr; attr = attr->next)
344     {
345         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
346             attr->children && attr->children->type == XML_TEXT_NODE)
347             info->input_charset = (const char *) attr->children->content;
348         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
349             attr->children && attr->children->type == XML_TEXT_NODE)
350             info->output_charset = (const char *) attr->children->content;
351         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
352             attr->children && attr->children->type == XML_TEXT_NODE)
353             input_format = (const char *) attr->children->content;
354         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
355             attr->children && attr->children->type == XML_TEXT_NODE)
356             output_format = (const char *) attr->children->content;
357         else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
358                  attr->children && attr->children->type == XML_TEXT_NODE)
359             info->leader_spec =
360                 nmem_strdup(info->nmem,(const char *) attr->children->content);
361         else
362         {
363             wrbuf_printf(wr_error, "Element <marc>: expected attributes"
364                          "'inputformat', 'inputcharset', 'outputformat' or"
365                          " 'outputcharset', got attribute '%s'",
366                          attr->name);
367             nmem_destroy(info->nmem);
368             return 0;
369         }
370     }
371     if (!input_format)
372     {
373         wrbuf_printf(wr_error, "Element <marc>: "
374                      "attribute 'inputformat' required");
375         nmem_destroy(info->nmem);
376         return 0;
377     }
378     else if (!strcmp(input_format, "marc"))
379     {
380         info->input_format_mode = YAZ_MARC_ISO2709;
381     }
382     else if (!strcmp(input_format, "xml"))
383     {
384         info->input_format_mode = YAZ_MARC_MARCXML;
385         /** Libxml2 generates UTF-8 encoding by default .
386             So we convert from UTF-8 to outputcharset (if defined)
387         */
388         if (!info->input_charset && info->output_charset)
389             info->input_charset = "utf-8";
390     }
391     else if (!strcmp(input_format, "json"))
392     {
393         info->input_format_mode = YAZ_MARC_JSON;
394     }
395     else
396     {
397         wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
398                      " Unsupported input format"
399                      " defined by attribute value",
400                      input_format);
401         nmem_destroy(info->nmem);
402         return 0;
403     }
404
405     if (!output_format)
406     {
407         wrbuf_printf(wr_error,
408                      "Element <marc>: attribute 'outputformat' required");
409         nmem_destroy(info->nmem);
410         return 0;
411     }
412     else if (!strcmp(output_format, "line"))
413     {
414         info->output_format_mode = YAZ_MARC_LINE;
415     }
416     else if (!strcmp(output_format, "marcxml"))
417     {
418         info->output_format_mode = YAZ_MARC_MARCXML;
419         if (info->input_charset && !info->output_charset)
420             info->output_charset = "utf-8";
421     }
422     else if (!strcmp(output_format, "turbomarc"))
423     {
424         info->output_format_mode = YAZ_MARC_TURBOMARC;
425         if (info->input_charset && !info->output_charset)
426             info->output_charset = "utf-8";
427     }
428     else if (!strcmp(output_format, "marc"))
429     {
430         info->output_format_mode = YAZ_MARC_ISO2709;
431     }
432     else if (!strcmp(output_format, "marcxchange"))
433     {
434         info->output_format_mode = YAZ_MARC_XCHANGE;
435         if (info->input_charset && !info->output_charset)
436             info->output_charset = "utf-8";
437     }
438     else if (!strcmp(output_format, "json"))
439     {
440         info->output_format_mode = YAZ_MARC_JSON;
441         if (info->input_charset && !info->output_charset)
442             info->output_charset = "utf-8";
443     }
444     else
445     {
446         wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
447                      " Unsupported output format"
448                      " defined by attribute value",
449                      output_format);
450         nmem_destroy(info->nmem);
451         return 0;
452     }
453     if (info->input_charset && info->output_charset)
454     {
455         yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
456                                         info->input_charset);
457         if (!cd)
458         {
459             wrbuf_printf(wr_error,
460                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
461                          " Unsupported character set mapping"
462                          " defined by attribute values",
463                          info->input_charset, info->output_charset);
464             nmem_destroy(info->nmem);
465             return 0;
466         }
467         yaz_iconv_close(cd);
468     }
469     else if (!info->output_charset)
470     {
471         wrbuf_printf(wr_error, "Element <marc>: "
472                      "attribute 'outputcharset' missing");
473         nmem_destroy(info->nmem);
474         return 0;
475     }
476     else if (!info->input_charset)
477     {
478         wrbuf_printf(wr_error, "Element <marc>: "
479                      "attribute 'inputcharset' missing");
480         nmem_destroy(info->nmem);
481         return 0;
482     }
483     info->input_charset = nmem_strdup(info->nmem, info->input_charset);
484     info->output_charset = nmem_strdup(info->nmem, info->output_charset);
485     return info;
486 }
487
488 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
489 {
490     struct marc_info *mi = info;
491     int ret = 0;
492
493     yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
494     yaz_marc_t mt = yaz_marc_create();
495
496     yaz_marc_xml(mt, mi->output_format_mode);
497     if (mi->leader_spec)
498         yaz_marc_leader_spec(mt, mi->leader_spec);
499
500     if (cd)
501         yaz_marc_iconv(mt, cd);
502     if (mi->input_format_mode == YAZ_MARC_ISO2709)
503     {
504         int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
505                                        wrbuf_len(record));
506         if (sz > 0)
507             ret = 0;
508         else
509             ret = -1;
510     }
511     else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
512              mi->input_format_mode == YAZ_MARC_TURBOMARC)
513     {
514         xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
515                                        wrbuf_len(record));
516         if (!doc)
517         {
518             wrbuf_printf(wr_error, "xmlParseMemory failed");
519             ret = -1;
520         }
521         else
522         {
523             ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
524             if (ret)
525                 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
526         }
527         xmlFreeDoc(doc);
528     }
529     else
530     {
531         wrbuf_printf(wr_error, "unsupported input format");
532         ret = -1;
533     }
534     if (ret == 0)
535     {
536         wrbuf_rewind(record);
537         ret = yaz_marc_write_mode(mt, record);
538         if (ret)
539             wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
540     }
541     if (cd)
542         yaz_iconv_close(cd);
543     yaz_marc_destroy(mt);
544     return ret;
545 }
546
547 static void destroy_marc(void *info)
548 {
549     struct marc_info *mi = info;
550
551     nmem_destroy(mi->nmem);
552 }
553
554 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
555                                 struct yaz_record_conv_type *types)
556 {
557     struct yaz_record_conv_type bt[2];
558
559     /* register marc */
560     bt[0].construct = construct_marc;
561     bt[0].convert = convert_marc;
562     bt[0].destroy = destroy_marc;
563
564 #if YAZ_HAVE_XSLT
565     /* register xslt */
566     bt[0].next = &bt[1];
567     bt[1].next = types;
568     bt[1].construct = construct_xslt;
569     bt[1].convert = convert_xslt;
570     bt[1].destroy = destroy_xslt;
571 #else
572     bt[0].next = types;
573 #endif
574
575     yaz_record_conv_reset(p);
576
577     /* parsing element children */
578     for (ptr = ptr->children; ptr; ptr = ptr->next)
579     {
580         struct yaz_record_conv_type *t;
581         struct yaz_record_conv_rule *r;
582         void *info = 0;
583         if (ptr->type != XML_ELEMENT_NODE)
584             continue;
585         for (t = &bt[0]; t; t = t->next)
586         {
587             wrbuf_rewind(p->wr_error);
588             info = t->construct(ptr, p->path, p->wr_error);
589
590             if (info || wrbuf_len(p->wr_error))
591                 break;
592             /* info== 0 and no error reported , ie not handled by it */
593         }
594         if (!info)
595         {
596             if (wrbuf_len(p->wr_error) == 0)
597                 wrbuf_printf(p->wr_error, "Element <backend>: expected "
598                              "<marc> or <xslt> element, got <%s>"
599                              , ptr->name);
600             return -1;
601         }
602         r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
603         r->next = 0;
604         r->info = info;
605         r->type = nmem_malloc(p->nmem, sizeof(*t));
606         memcpy(r->type, t, sizeof(*t));
607         *p->rules_p = r;
608         p->rules_p = &r->next;
609     }
610     return 0;
611 }
612
613 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
614 {
615     return yaz_record_conv_configure_t(p, ptr, 0);
616 }
617
618 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
619                                        struct yaz_record_conv_rule *r,
620                                        const char *input_record_buf,
621                                        size_t input_record_len,
622                                        WRBUF output_record)
623 {
624     int ret = 0;
625     WRBUF record = output_record; /* pointer transfer */
626     wrbuf_rewind(p->wr_error);
627
628     wrbuf_write(record, input_record_buf, input_record_len);
629     for (; ret == 0 && r; r = r->next)
630         ret = r->type->convert(r->info, record, p->wr_error);
631     return ret;
632 }
633
634 int yaz_record_conv_opac_record(yaz_record_conv_t p,
635                                 Z_OPACRecord *input_record,
636                                 WRBUF output_record)
637 {
638     int ret = 0;
639     struct yaz_record_conv_rule *r = p->rules;
640     if (!r || r->type->construct != construct_marc)
641     {
642         wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC");
643         ret = -1; /* no marc rule so we can't do OPAC */
644     }
645     else
646     {
647         struct marc_info *mi = r->info;
648
649         WRBUF res = wrbuf_alloc();
650         yaz_marc_t mt = yaz_marc_create();
651         yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
652                                         mi->input_charset);
653
654         wrbuf_rewind(p->wr_error);
655         yaz_marc_xml(mt, mi->output_format_mode);
656
657         yaz_marc_iconv(mt, cd);
658
659         yaz_opac_decode_wrbuf(mt, input_record, res);
660         if (ret != -1)
661         {
662             ret = yaz_record_conv_record_rule(p,
663                                               r->next,
664                                               wrbuf_buf(res), wrbuf_len(res),
665                                               output_record);
666         }
667         yaz_marc_destroy(mt);
668         if (cd)
669             yaz_iconv_close(cd);
670         wrbuf_destroy(res);
671     }
672     return ret;
673 }
674
675 int yaz_record_conv_record(yaz_record_conv_t p,
676                            const char *input_record_buf,
677                            size_t input_record_len,
678                            WRBUF output_record)
679 {
680     return yaz_record_conv_record_rule(p, p->rules,
681                                        input_record_buf,
682                                        input_record_len, output_record);
683 }
684
685 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
686 {
687     return wrbuf_cstr(p->wr_error);
688 }
689
690 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
691 {
692     xfree(p->path);
693     p->path = 0;
694     if (path)
695         p->path = xstrdup(path);
696 }
697
698 yaz_record_conv_t yaz_record_conv_create()
699 {
700     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
701     p->nmem = nmem_create();
702     p->wr_error = wrbuf_alloc();
703     p->rules = 0;
704     p->path = 0;
705 #if YAZ_HAVE_EXSLT
706     exsltRegisterAll();
707 #endif
708     return p;
709 }
710
711 /* YAZ_HAVE_XML2 */
712 #endif
713
714 /*
715  * Local variables:
716  * c-basic-offset: 4
717  * c-file-style: "Stroustrup"
718  * indent-tabs-mode: nil
719  * End:
720  * vim: shiftwidth=4 tabstop=8 expandtab
721  */
722