1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2011 Index Data
3 * See the file LICENSE for details.
7 * \brief Record Conversions utility
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
33 #include <libexslt/exslt.h>
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38 /** \brief memory for configuration */
41 /** \brief conversion rules (allocated using NMEM) */
42 struct yaz_record_conv_rule *rules;
44 /** \brief pointer to last conversion rule pointer in chain */
45 struct yaz_record_conv_rule **rules_p;
47 /** \brief string buffer for error messages */
50 /** \brief path for opening files */
54 /** \brief tranformation types (rule types) */
55 enum YAZ_RECORD_CONV_RULE
57 YAZ_RECORD_CONV_RULE_XSLT,
58 YAZ_RECORD_CONV_RULE_MARC
61 /** \brief tranformation info (rule info) */
62 struct yaz_record_conv_rule {
63 enum YAZ_RECORD_CONV_RULE which;
71 const char *input_charset;
72 const char *output_charset;
77 struct yaz_record_conv_rule *next;
80 /** \brief reset rules+configuration */
81 static void yaz_record_conv_reset(yaz_record_conv_t p)
84 struct yaz_record_conv_rule *r;
85 for (r = p->rules; r; r = r->next)
87 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
92 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
94 xmlFreeDoc(r->u.xslt.xsp_doc);
98 wrbuf_rewind(p->wr_error);
103 p->rules_p = &p->rules;
106 yaz_record_conv_t yaz_record_conv_create()
108 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
109 p->nmem = nmem_create();
110 p->wr_error = wrbuf_alloc();
117 yaz_record_conv_reset(p);
121 void yaz_record_conv_destroy(yaz_record_conv_t p)
125 yaz_record_conv_reset(p);
126 nmem_destroy(p->nmem);
127 wrbuf_destroy(p->wr_error);
133 /** \brief adds a rule */
134 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
135 enum YAZ_RECORD_CONV_RULE type)
137 struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
138 nmem_malloc(p->nmem, sizeof(*r));
142 p->rules_p = &r->next;
146 /** \brief parse 'xslt' conversion node */
147 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
150 struct _xmlAttr *attr;
151 const char *stylesheet = 0;
153 for (attr = ptr->properties; attr; attr = attr->next)
155 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
156 attr->children && attr->children->type == XML_TEXT_NODE)
157 stylesheet = (const char *) attr->children->content;
160 wrbuf_printf(p->wr_error, "Bad attribute '%s'"
161 "Expected stylesheet.", attr->name);
167 wrbuf_printf(p->wr_error, "Element <xslt>: "
168 "attribute 'stylesheet' expected");
174 xsltStylesheetPtr xsp;
176 if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
178 wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
179 " could not locate stylesheet '%s'",
180 stylesheet, fullpath);
182 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
186 xsp_doc = xmlParseFile(fullpath);
189 wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
190 " xml parse failed: %s", stylesheet, fullpath);
192 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
195 /* need to copy this before passing it to the processor. It will
196 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
197 xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
200 wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
201 " xslt parse failed: %s", stylesheet, fullpath);
203 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
204 wrbuf_printf(p->wr_error, " ("
209 "EXSLT not supported"
217 struct yaz_record_conv_rule *r =
218 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
219 r->u.xslt.xsp_doc = xsp_doc;
220 xsltFreeStylesheet(xsp);
225 wrbuf_printf(p->wr_error, "xslt unsupported."
226 " YAZ compiled without XSLT support");
231 /** \brief parse 'marc' conversion node */
232 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
234 struct _xmlAttr *attr;
235 const char *input_charset = 0;
236 const char *output_charset = 0;
237 const char *input_format = 0;
238 const char *output_format = 0;
239 int input_format_mode = 0;
240 int output_format_mode = 0;
241 struct yaz_record_conv_rule *r;
243 for (attr = ptr->properties; attr; attr = attr->next)
245 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
246 attr->children && attr->children->type == XML_TEXT_NODE)
247 input_charset = (const char *) attr->children->content;
248 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
249 attr->children && attr->children->type == XML_TEXT_NODE)
250 output_charset = (const char *) attr->children->content;
251 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
252 attr->children && attr->children->type == XML_TEXT_NODE)
253 input_format = (const char *) attr->children->content;
254 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
255 attr->children && attr->children->type == XML_TEXT_NODE)
256 output_format = (const char *) attr->children->content;
259 wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
260 "'inputformat', 'inputcharset', 'outputformat' or"
261 " 'outputcharset', got attribute '%s'",
268 wrbuf_printf(p->wr_error, "Element <marc>: "
269 "attribute 'inputformat' required");
272 else if (!strcmp(input_format, "marc"))
274 input_format_mode = YAZ_MARC_ISO2709;
276 else if (!strcmp(input_format, "xml"))
278 input_format_mode = YAZ_MARC_MARCXML;
279 /** Libxml2 generates UTF-8 encoding by default .
280 So we convert from UTF-8 to outputcharset (if defined)
282 if (!input_charset && output_charset)
283 input_charset = "utf-8";
287 wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
288 " Unsupported input format"
289 " defined by attribute value",
296 wrbuf_printf(p->wr_error,
297 "Element <marc>: attribute 'outputformat' required");
300 else if (!strcmp(output_format, "line"))
302 output_format_mode = YAZ_MARC_LINE;
304 else if (!strcmp(output_format, "marcxml"))
306 output_format_mode = YAZ_MARC_MARCXML;
307 if (input_charset && !output_charset)
308 output_charset = "utf-8";
310 else if (!strcmp(output_format, "turbomarc"))
312 output_format_mode = YAZ_MARC_TURBOMARC;
313 if (input_charset && !output_charset)
314 output_charset = "utf-8";
316 else if (!strcmp(output_format, "marc"))
318 output_format_mode = YAZ_MARC_ISO2709;
320 else if (!strcmp(output_format, "marcxchange"))
322 output_format_mode = YAZ_MARC_XCHANGE;
323 if (input_charset && !output_charset)
324 output_charset = "utf-8";
328 wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
329 " Unsupported output format"
330 " defined by attribute value",
334 if (input_charset && output_charset)
336 yaz_iconv_t cd = yaz_iconv_open(output_charset, input_charset);
339 wrbuf_printf(p->wr_error,
340 "Element <marc inputcharset='%s' outputcharset='%s'>:"
341 " Unsupported character set mapping"
342 " defined by attribute values",
343 input_charset, output_charset);
348 else if (input_charset)
350 wrbuf_printf(p->wr_error, "Element <marc>: "
351 "attribute 'outputcharset' missing");
354 else if (output_charset)
356 wrbuf_printf(p->wr_error, "Element <marc>: "
357 "attribute 'inputcharset' missing");
360 r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
362 r->u.marc.input_charset = nmem_strdup(p->nmem, input_charset);
363 r->u.marc.output_charset = nmem_strdup(p->nmem, output_charset);
364 r->u.marc.input_format = input_format_mode;
365 r->u.marc.output_format = output_format_mode;
369 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
371 yaz_record_conv_reset(p);
373 /* parsing element children */
374 for (ptr = ptr->children; ptr; ptr = ptr->next)
376 if (ptr->type != XML_ELEMENT_NODE)
378 if (!strcmp((const char *) ptr->name, "xslt"))
380 if (conv_xslt(p, ptr))
383 else if (!strcmp((const char *) ptr->name, "marc"))
385 if (conv_marc(p, ptr))
390 wrbuf_printf(p->wr_error, "Element <backend>: expected "
391 "<marc> or <xslt> element, got <%s>"
399 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
400 struct yaz_record_conv_rule *r,
401 const char *input_record_buf,
402 size_t input_record_len,
403 WRBUF output_record);
405 int yaz_record_conv_opac_record(yaz_record_conv_t p,
406 Z_OPACRecord *input_record,
410 struct yaz_record_conv_rule *r = p->rules;
411 if (!r || r->which != YAZ_RECORD_CONV_RULE_MARC)
412 ret = -1; /* no marc rule so we can't do OPAC */
415 WRBUF res = wrbuf_alloc();
416 yaz_marc_t mt = yaz_marc_create();
417 yaz_iconv_t cd = yaz_iconv_open(r->u.marc.output_charset,
418 r->u.marc.input_charset);
420 wrbuf_rewind(p->wr_error);
421 yaz_marc_xml(mt, r->u.marc.output_format);
423 yaz_marc_iconv(mt, cd);
425 yaz_opac_decode_wrbuf(mt, input_record, res);
428 ret = yaz_record_conv_record_rule(p,
430 wrbuf_buf(res), wrbuf_len(res),
433 yaz_marc_destroy(mt);
441 int yaz_record_conv_record(yaz_record_conv_t p,
442 const char *input_record_buf,
443 size_t input_record_len,
446 return yaz_record_conv_record_rule(p, p->rules,
448 input_record_len, output_record);
451 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
452 struct yaz_record_conv_rule *r,
453 const char *input_record_buf,
454 size_t input_record_len,
458 WRBUF record = output_record; /* pointer transfer */
459 wrbuf_rewind(p->wr_error);
461 wrbuf_write(record, input_record_buf, input_record_len);
462 for (; ret == 0 && r; r = r->next)
464 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
467 yaz_iconv_open(r->u.marc.output_charset,
468 r->u.marc.input_charset);
469 yaz_marc_t mt = yaz_marc_create();
471 yaz_marc_xml(mt, r->u.marc.output_format);
474 yaz_marc_iconv(mt, cd);
475 if (r->u.marc.input_format == YAZ_MARC_ISO2709)
477 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
484 else if (r->u.marc.input_format == YAZ_MARC_MARCXML ||
485 r->u.marc.input_format == YAZ_MARC_TURBOMARC)
487 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
491 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
496 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
498 wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
504 wrbuf_printf(p->wr_error, "unsupported input format");
509 wrbuf_rewind(record);
510 ret = yaz_marc_write_mode(mt, record);
512 wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
516 yaz_marc_destroy(mt);
519 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
521 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
525 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
530 xmlDocPtr xsp_doc = xmlCopyDoc(r->u.xslt.xsp_doc, 1);
531 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
532 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
535 xmlChar *out_buf = 0;
538 #if HAVE_XSLTSAVERESULTTOSTRING
539 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
541 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
545 wrbuf_printf(p->wr_error,
546 "xsltSaveResultToString failed");
551 wrbuf_rewind(record);
552 wrbuf_write(record, (const char *) out_buf, out_len);
560 wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
564 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
572 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
574 return wrbuf_cstr(p->wr_error);
577 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
582 p->path = xstrdup(path);
589 * c-file-style: "Stroustrup"
590 * indent-tabs-mode: nil
592 * vim: shiftwidth=4 tabstop=8 expandtab