1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Record Conversions utility
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23 #include <yaz/xml_get.h>
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #include <libxml/xpath.h>
30 #include <libxml/xpathInternals.h>
32 #include <libxslt/xsltutils.h>
33 #include <libxslt/transform.h>
36 #include <libexslt/exslt.h>
39 /** \brief The internal structure for yaz_record_conv_t */
40 struct yaz_record_conv_struct {
41 /** \brief memory for configuration */
44 /** \brief conversion rules (allocated using NMEM) */
45 struct yaz_record_conv_rule *rules;
47 /** \brief pointer to last conversion rule pointer in chain */
48 struct yaz_record_conv_rule **rules_p;
50 /** \brief string buffer for error messages */
53 /** \brief path for opening files */
59 const char *input_charset;
60 const char *output_charset;
61 int input_format_mode;
62 int output_format_mode;
63 const char *leader_spec;
66 /** \brief tranformation info (rule info) */
67 struct yaz_record_conv_rule {
68 struct yaz_record_conv_type *type;
70 struct yaz_record_conv_rule *next;
73 /** \brief reset rules+configuration */
74 static void yaz_record_conv_reset(yaz_record_conv_t p)
77 struct yaz_record_conv_rule *r;
78 for (r = p->rules; r; r = r->next)
80 r->type->destroy(r->info);
82 wrbuf_rewind(p->wr_error);
87 p->rules_p = &p->rules;
90 void yaz_record_conv_destroy(yaz_record_conv_t p)
94 yaz_record_conv_reset(p);
95 nmem_destroy(p->nmem);
96 wrbuf_destroy(p->wr_error);
107 const char **xsl_parms;
110 static void *construct_xslt(const xmlNode *ptr,
111 const char *path, WRBUF wr_error)
113 struct _xmlAttr *attr;
114 const char *stylesheet = 0;
115 struct xslt_info *info = 0;
120 if (strcmp((const char *) ptr->name, "xslt"))
123 for (attr = ptr->properties; attr; attr = attr->next)
125 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
126 attr->children && attr->children->type == XML_TEXT_NODE)
127 stylesheet = (const char *) attr->children->content;
130 wrbuf_printf(wr_error, "Bad attribute '%s'"
131 "Expected stylesheet.", attr->name);
135 nmem = nmem_create();
136 info = nmem_malloc(nmem, sizeof(*info));
138 info->xsl_parms = nmem_malloc(
139 nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms));
141 for (ptr = ptr->children; ptr; ptr = ptr->next)
143 const char *name = 0;
144 const char *value = 0;
146 if (ptr->type != XML_ELEMENT_NODE)
148 if (strcmp((const char *) ptr->name, "param"))
150 wrbuf_printf(wr_error, "Bad element '%s'"
151 "Expected param.", ptr->name);
155 for (attr = ptr->properties; attr; attr = attr->next)
157 if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
158 attr->children && attr->children->type == XML_TEXT_NODE)
159 name = (const char *) attr->children->content;
160 else if (!xmlStrcmp(attr->name, BAD_CAST "value") &&
161 attr->children && attr->children->type == XML_TEXT_NODE)
162 value = (const char *) attr->children->content;
165 wrbuf_printf(wr_error, "Bad attribute '%s'"
166 "Expected name or value.", attr->name);
173 wrbuf_printf(wr_error, "Missing attributes name or value");
177 if (no_parms >= max_parms)
179 wrbuf_printf(wr_error, "Too many parameters given");
184 qvalue = nmem_malloc(nmem, strlen(value) + 3);
185 strcpy(qvalue, "\'");
186 strcat(qvalue, value);
187 strcat(qvalue, "\'");
189 info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name);
190 info->xsl_parms[2 * no_parms + 1] = qvalue;
194 info->xsl_parms[2 * no_parms] = '\0';
198 wrbuf_printf(wr_error, "Element <xslt>: "
199 "attribute 'stylesheet' expected");
205 xsltStylesheetPtr xsp;
206 if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
208 wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
209 " could not locate stylesheet '%s'",
210 stylesheet, stylesheet);
212 wrbuf_printf(wr_error, " with path '%s'", path);
217 info->xsp_doc = xmlParseFile(fullpath);
220 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
221 " xml parse failed: %s", stylesheet, fullpath);
223 wrbuf_printf(wr_error, " with path '%s'", path);
227 /* need to copy this before passing it to the processor. It will
228 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
229 xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1));
232 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
233 " xslt parse failed: %s", stylesheet, fullpath);
235 wrbuf_printf(wr_error, " with path '%s'", path);
236 wrbuf_printf(wr_error, " ("
241 "EXSLT not supported"
244 xmlFreeDoc(info->xsp_doc);
245 nmem_destroy(info->nmem);
249 xsltFreeStylesheet(xsp);
256 static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error)
259 struct xslt_info *info = vinfo;
261 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
265 wrbuf_printf(wr_error, "xmlParseMemory failed");
270 xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1);
271 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
272 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms);
275 xmlChar *out_buf = 0;
278 #if HAVE_XSLTSAVERESULTTOSTRING
279 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
281 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
285 wrbuf_printf(wr_error,
286 "xsltSaveResultToString failed");
291 wrbuf_rewind(record);
292 wrbuf_write(record, (const char *) out_buf, out_len);
300 wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
304 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
309 static void destroy_xslt(void *vinfo)
311 struct xslt_info *info = vinfo;
315 xmlFreeDoc(info->xsp_doc);
316 nmem_destroy(info->nmem);
328 static void *construct_select(const xmlNode *ptr,
329 const char *path, WRBUF wr_error)
331 if (strcmp((const char *) ptr->name, "select"))
335 NMEM nmem = nmem_create();
336 struct select_info *info = nmem_malloc(nmem, sizeof(*info));
337 const char *attr_str;
338 const char *xpath = 0;
341 info->xpath_expr = 0;
342 attr_str = yaz_xml_get_prop(ptr, "path%s", &xpath);
345 wrbuf_printf(wr_error, "Bad attribute '%s'"
346 "Expected xpath.", attr_str);
351 info->xpath_expr = nmem_strdup(nmem, xpath);
356 static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error)
359 struct select_info *info = vinfo;
361 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
365 wrbuf_printf(wr_error, "xmlParseMemory failed");
370 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
371 if (xpathCtx && info->xpath_expr)
373 xmlXPathObjectPtr xpathObj =
374 xmlXPathEvalExpression((const xmlChar *) info->xpath_expr,
378 xmlNodeSetPtr nodes = xpathObj->nodesetval;
382 if (nodes->nodeNr > 0)
383 wrbuf_rewind(record);
384 for (i = 0; i < nodes->nodeNr; i++)
386 xmlNode *ptr = nodes->nodeTab[i];
387 if (ptr->type == XML_ELEMENT_NODE)
389 for (; ptr; ptr = ptr->next)
390 if (ptr->type == XML_TEXT_NODE)
391 wrbuf_puts(record, (const char *) ptr->content);
394 xmlXPathFreeObject(xpathObj);
396 xmlXPathFreeContext(xpathCtx);
403 static void destroy_select(void *vinfo)
405 struct select_info *info = vinfo;
408 nmem_destroy(info->nmem);
412 static void *construct_solrmarc(const xmlNode *ptr,
413 const char *path, WRBUF wr_error)
415 if (strcmp((const char *) ptr->name, "solrmarc"))
417 return wr_error; /* any non-null ptr will do; we don't use it later*/
420 static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error)
422 WRBUF w = wrbuf_alloc();
423 const char *buf = wrbuf_buf(record);
424 size_t i, sz = wrbuf_len(record);
425 for (i = 0; i < sz; i++)
428 if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';'
429 && atoi_n_check(buf+i+1, 2, &ch))
435 wrbuf_rewind(record);
436 wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w));
441 static void destroy_solrmarc(void *info)
445 static void *construct_marc(const xmlNode *ptr,
446 const char *path, WRBUF wr_error)
448 NMEM nmem = nmem_create();
449 struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
450 struct _xmlAttr *attr;
451 const char *input_format = 0;
452 const char *output_format = 0;
454 if (strcmp((const char *) ptr->name, "marc"))
460 info->input_charset = 0;
461 info->output_charset = 0;
462 info->input_format_mode = 0;
463 info->output_format_mode = 0;
464 info->leader_spec = 0;
466 for (attr = ptr->properties; attr; attr = attr->next)
468 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
469 attr->children && attr->children->type == XML_TEXT_NODE)
470 info->input_charset = (const char *) attr->children->content;
471 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
472 attr->children && attr->children->type == XML_TEXT_NODE)
473 info->output_charset = (const char *) attr->children->content;
474 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
475 attr->children && attr->children->type == XML_TEXT_NODE)
476 input_format = (const char *) attr->children->content;
477 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
478 attr->children && attr->children->type == XML_TEXT_NODE)
479 output_format = (const char *) attr->children->content;
480 else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
481 attr->children && attr->children->type == XML_TEXT_NODE)
483 nmem_strdup(info->nmem,(const char *) attr->children->content);
486 wrbuf_printf(wr_error, "Element <marc>: expected attributes"
487 "'inputformat', 'inputcharset', 'outputformat' or"
488 " 'outputcharset', got attribute '%s'",
490 nmem_destroy(info->nmem);
496 wrbuf_printf(wr_error, "Element <marc>: "
497 "attribute 'inputformat' required");
498 nmem_destroy(info->nmem);
501 else if (!strcmp(input_format, "marc"))
503 info->input_format_mode = YAZ_MARC_ISO2709;
505 else if (!strcmp(input_format, "xml"))
507 info->input_format_mode = YAZ_MARC_MARCXML;
508 /** Libxml2 generates UTF-8 encoding by default .
509 So we convert from UTF-8 to outputcharset (if defined)
511 if (!info->input_charset && info->output_charset)
512 info->input_charset = "utf-8";
514 else if (!strcmp(input_format, "json"))
516 info->input_format_mode = YAZ_MARC_JSON;
520 wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
521 " Unsupported input format"
522 " defined by attribute value",
524 nmem_destroy(info->nmem);
530 wrbuf_printf(wr_error,
531 "Element <marc>: attribute 'outputformat' required");
532 nmem_destroy(info->nmem);
535 else if (!strcmp(output_format, "line"))
537 info->output_format_mode = YAZ_MARC_LINE;
539 else if (!strcmp(output_format, "marcxml"))
541 info->output_format_mode = YAZ_MARC_MARCXML;
542 if (info->input_charset && !info->output_charset)
543 info->output_charset = "utf-8";
545 else if (!strcmp(output_format, "turbomarc"))
547 info->output_format_mode = YAZ_MARC_TURBOMARC;
548 if (info->input_charset && !info->output_charset)
549 info->output_charset = "utf-8";
551 else if (!strcmp(output_format, "marc"))
553 info->output_format_mode = YAZ_MARC_ISO2709;
555 else if (!strcmp(output_format, "marcxchange"))
557 info->output_format_mode = YAZ_MARC_XCHANGE;
558 if (info->input_charset && !info->output_charset)
559 info->output_charset = "utf-8";
561 else if (!strcmp(output_format, "json"))
563 info->output_format_mode = YAZ_MARC_JSON;
564 if (info->input_charset && !info->output_charset)
565 info->output_charset = "utf-8";
569 wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
570 " Unsupported output format"
571 " defined by attribute value",
573 nmem_destroy(info->nmem);
576 if (info->input_charset && info->output_charset)
578 yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
579 info->input_charset);
582 wrbuf_printf(wr_error,
583 "Element <marc inputcharset='%s' outputcharset='%s'>:"
584 " Unsupported character set mapping"
585 " defined by attribute values",
586 info->input_charset, info->output_charset);
587 nmem_destroy(info->nmem);
592 else if (!info->output_charset)
594 wrbuf_printf(wr_error, "Element <marc>: "
595 "attribute 'outputcharset' missing");
596 nmem_destroy(info->nmem);
599 else if (!info->input_charset)
601 wrbuf_printf(wr_error, "Element <marc>: "
602 "attribute 'inputcharset' missing");
603 nmem_destroy(info->nmem);
606 info->input_charset = nmem_strdup(info->nmem, info->input_charset);
607 info->output_charset = nmem_strdup(info->nmem, info->output_charset);
611 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
613 struct marc_info *mi = info;
614 const char *input_charset = mi->input_charset;
616 yaz_marc_t mt = yaz_marc_create();
618 yaz_marc_xml(mt, mi->output_format_mode);
620 yaz_marc_leader_spec(mt, mi->leader_spec);
622 if (mi->input_format_mode == YAZ_MARC_ISO2709)
624 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
628 if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record),
630 input_charset = "utf-8";
636 else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
637 mi->input_format_mode == YAZ_MARC_TURBOMARC)
639 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
643 wrbuf_printf(wr_error, "xmlParseMemory failed");
648 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
650 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
656 wrbuf_printf(wr_error, "unsupported input format");
661 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset);
664 yaz_marc_iconv(mt, cd);
666 wrbuf_rewind(record);
667 ret = yaz_marc_write_mode(mt, record);
669 wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
673 yaz_marc_destroy(mt);
677 static void destroy_marc(void *info)
679 struct marc_info *mi = info;
681 nmem_destroy(mi->nmem);
684 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
685 struct yaz_record_conv_type *types)
687 struct yaz_record_conv_type bt[4];
691 bt[i].construct = construct_marc;
692 bt[i].convert = convert_marc;
693 bt[i++].destroy = destroy_marc;
695 bt[i-1].next = &bt[i];
696 bt[i].construct = construct_solrmarc;
697 bt[i].convert = convert_solrmarc;
698 bt[i++].destroy = destroy_solrmarc;
700 bt[i-1].next = &bt[i];
701 bt[i].construct = construct_select;
702 bt[i].convert = convert_select;
703 bt[i++].destroy = destroy_select;
707 bt[i-1].next = &bt[i];
708 bt[i].construct = construct_xslt;
709 bt[i].convert = convert_xslt;
710 bt[i++].destroy = destroy_xslt;
713 bt[i-1].next = types;
714 yaz_record_conv_reset(p);
716 /* parsing element children */
717 for (ptr = ptr->children; ptr; ptr = ptr->next)
719 struct yaz_record_conv_type *t;
720 struct yaz_record_conv_rule *r;
722 if (ptr->type != XML_ELEMENT_NODE)
724 for (t = &bt[0]; t; t = t->next)
726 wrbuf_rewind(p->wr_error);
727 info = t->construct(ptr, p->path, p->wr_error);
729 if (info || wrbuf_len(p->wr_error))
731 /* info== 0 and no error reported , ie not handled by it */
735 if (wrbuf_len(p->wr_error) == 0)
736 wrbuf_printf(p->wr_error, "Element <backend>: expected "
737 "<marc> or <xslt> element, got <%s>"
741 r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
744 r->type = nmem_malloc(p->nmem, sizeof(*t));
745 memcpy(r->type, t, sizeof(*t));
747 p->rules_p = &r->next;
752 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
754 return yaz_record_conv_configure_t(p, ptr, 0);
757 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
758 struct yaz_record_conv_rule *r,
759 const char *input_record_buf,
760 size_t input_record_len,
764 WRBUF record = output_record; /* pointer transfer */
765 wrbuf_rewind(p->wr_error);
767 wrbuf_write(record, input_record_buf, input_record_len);
768 for (; ret == 0 && r; r = r->next)
769 ret = r->type->convert(r->info, record, p->wr_error);
773 int yaz_record_conv_opac_record(yaz_record_conv_t p,
774 Z_OPACRecord *input_record,
778 struct yaz_record_conv_rule *r = p->rules;
779 if (!r || r->type->construct != construct_marc)
781 wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC");
782 ret = -1; /* no marc rule so we can't do OPAC */
786 struct marc_info *mi = r->info;
787 const char *input_charset = mi->input_charset;
790 WRBUF res = wrbuf_alloc();
791 yaz_marc_t mt = yaz_marc_create();
793 if (yaz_opac_check_marc21_coding(input_charset, input_record))
794 input_charset = "utf-8";
795 cd = yaz_iconv_open(mi->output_charset, input_charset);
797 wrbuf_rewind(p->wr_error);
798 yaz_marc_xml(mt, mi->output_format_mode);
800 yaz_marc_iconv(mt, cd);
802 yaz_opac_decode_wrbuf(mt, input_record, res);
805 ret = yaz_record_conv_record_rule(p,
807 wrbuf_buf(res), wrbuf_len(res),
810 yaz_marc_destroy(mt);
818 int yaz_record_conv_record(yaz_record_conv_t p,
819 const char *input_record_buf,
820 size_t input_record_len,
823 return yaz_record_conv_record_rule(p, p->rules,
825 input_record_len, output_record);
828 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
830 return wrbuf_cstr(p->wr_error);
833 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
838 p->path = xstrdup(path);
841 yaz_record_conv_t yaz_record_conv_create()
843 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
844 p->nmem = nmem_create();
845 p->wr_error = wrbuf_alloc();
857 * c-file-style: "Stroustrup"
858 * indent-tabs-mode: nil
860 * vim: shiftwidth=4 tabstop=8 expandtab