1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
7 * \brief Record Conversions utility
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
33 #include <libexslt/exslt.h>
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38 /** \brief memory for configuration */
41 /** \brief conversion rules (allocated using NMEM) */
42 struct yaz_record_conv_rule *rules;
44 /** \brief pointer to last conversion rule pointer in chain */
45 struct yaz_record_conv_rule **rules_p;
47 /** \brief string buffer for error messages */
50 /** \brief path for opening files */
56 const char *input_charset;
57 const char *output_charset;
58 int input_format_mode;
59 int output_format_mode;
60 const char *leader_spec;
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65 struct yaz_record_conv_type *type;
67 struct yaz_record_conv_rule *next;
70 /** \brief reset rules+configuration */
71 static void yaz_record_conv_reset(yaz_record_conv_t p)
74 struct yaz_record_conv_rule *r;
75 for (r = p->rules; r; r = r->next)
77 r->type->destroy(r->info);
79 wrbuf_rewind(p->wr_error);
84 p->rules_p = &p->rules;
87 void yaz_record_conv_destroy(yaz_record_conv_t p)
91 yaz_record_conv_reset(p);
92 nmem_destroy(p->nmem);
93 wrbuf_destroy(p->wr_error);
101 static void *construct_xslt(const xmlNode *ptr,
102 const char *path, WRBUF wr_error)
104 struct _xmlAttr *attr;
105 const char *stylesheet = 0;
107 if (strcmp((const char *) ptr->name, "xslt"))
110 for (attr = ptr->properties; attr; attr = attr->next)
112 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
113 attr->children && attr->children->type == XML_TEXT_NODE)
114 stylesheet = (const char *) attr->children->content;
117 wrbuf_printf(wr_error, "Bad attribute '%s'"
118 "Expected stylesheet.", attr->name);
124 wrbuf_printf(wr_error, "Element <xslt>: "
125 "attribute 'stylesheet' expected");
131 xsltStylesheetPtr xsp;
133 if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
135 wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
136 " could not locate stylesheet '%s'",
137 stylesheet, stylesheet);
139 wrbuf_printf(wr_error, " with path '%s'", path);
143 xsp_doc = xmlParseFile(fullpath);
146 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
147 " xml parse failed: %s", stylesheet, fullpath);
149 wrbuf_printf(wr_error, " with path '%s'", path);
152 /* need to copy this before passing it to the processor. It will
153 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
154 xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
157 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
158 " xslt parse failed: %s", stylesheet, fullpath);
160 wrbuf_printf(wr_error, " with path '%s'", path);
161 wrbuf_printf(wr_error, " ("
166 "EXSLT not supported"
174 xsltFreeStylesheet(xsp);
181 static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
184 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
188 wrbuf_printf(wr_error, "xmlParseMemory failed");
193 xmlDocPtr xsp_doc = xmlCopyDoc((xmlDocPtr) info, 1);
194 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
195 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
198 xmlChar *out_buf = 0;
201 #if HAVE_XSLTSAVERESULTTOSTRING
202 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
204 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
208 wrbuf_printf(wr_error,
209 "xsltSaveResultToString failed");
214 wrbuf_rewind(record);
215 wrbuf_write(record, (const char *) out_buf, out_len);
223 wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
227 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
232 static void destroy_xslt(void *info)
236 xmlDocPtr xsp_doc = info;
245 static void *construct_marc(const xmlNode *ptr,
246 const char *path, WRBUF wr_error)
248 NMEM nmem = nmem_create();
249 struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
250 struct _xmlAttr *attr;
251 const char *input_format = 0;
252 const char *output_format = 0;
254 if (strcmp((const char *) ptr->name, "marc"))
261 info->input_charset = 0;
262 info->output_charset = 0;
263 info->input_format_mode = 0;
264 info->output_format_mode = 0;
265 info->leader_spec = 0;
267 for (attr = ptr->properties; attr; attr = attr->next)
269 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
270 attr->children && attr->children->type == XML_TEXT_NODE)
271 info->input_charset = (const char *) attr->children->content;
272 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
273 attr->children && attr->children->type == XML_TEXT_NODE)
274 info->output_charset = (const char *) attr->children->content;
275 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
276 attr->children && attr->children->type == XML_TEXT_NODE)
277 input_format = (const char *) attr->children->content;
278 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
279 attr->children && attr->children->type == XML_TEXT_NODE)
280 output_format = (const char *) attr->children->content;
281 else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
282 attr->children && attr->children->type == XML_TEXT_NODE)
284 nmem_strdup(info->nmem,(const char *) attr->children->content);
287 wrbuf_printf(wr_error, "Element <marc>: expected attributes"
288 "'inputformat', 'inputcharset', 'outputformat' or"
289 " 'outputcharset', got attribute '%s'",
291 nmem_destroy(info->nmem);
297 wrbuf_printf(wr_error, "Element <marc>: "
298 "attribute 'inputformat' required");
299 nmem_destroy(info->nmem);
302 else if (!strcmp(input_format, "marc"))
304 info->input_format_mode = YAZ_MARC_ISO2709;
306 else if (!strcmp(input_format, "xml"))
308 info->input_format_mode = YAZ_MARC_MARCXML;
309 /** Libxml2 generates UTF-8 encoding by default .
310 So we convert from UTF-8 to outputcharset (if defined)
312 if (!info->input_charset && info->output_charset)
313 info->input_charset = "utf-8";
317 wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
318 " Unsupported input format"
319 " defined by attribute value",
321 nmem_destroy(info->nmem);
327 wrbuf_printf(wr_error,
328 "Element <marc>: attribute 'outputformat' required");
329 nmem_destroy(info->nmem);
332 else if (!strcmp(output_format, "line"))
334 info->output_format_mode = YAZ_MARC_LINE;
336 else if (!strcmp(output_format, "marcxml"))
338 info->output_format_mode = YAZ_MARC_MARCXML;
339 if (info->input_charset && !info->output_charset)
340 info->output_charset = "utf-8";
342 else if (!strcmp(output_format, "turbomarc"))
344 info->output_format_mode = YAZ_MARC_TURBOMARC;
345 if (info->input_charset && !info->output_charset)
346 info->output_charset = "utf-8";
348 else if (!strcmp(output_format, "marc"))
350 info->output_format_mode = YAZ_MARC_ISO2709;
352 else if (!strcmp(output_format, "marcxchange"))
354 info->output_format_mode = YAZ_MARC_XCHANGE;
355 if (info->input_charset && !info->output_charset)
356 info->output_charset = "utf-8";
360 wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
361 " Unsupported output format"
362 " defined by attribute value",
364 nmem_destroy(info->nmem);
367 if (info->input_charset && info->output_charset)
369 yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
370 info->input_charset);
373 wrbuf_printf(wr_error,
374 "Element <marc inputcharset='%s' outputcharset='%s'>:"
375 " Unsupported character set mapping"
376 " defined by attribute values",
377 info->input_charset, info->output_charset);
378 nmem_destroy(info->nmem);
383 else if (!info->output_charset)
385 wrbuf_printf(wr_error, "Element <marc>: "
386 "attribute 'outputcharset' missing");
387 nmem_destroy(info->nmem);
390 else if (!info->input_charset)
392 wrbuf_printf(wr_error, "Element <marc>: "
393 "attribute 'inputcharset' missing");
394 nmem_destroy(info->nmem);
397 info->input_charset = nmem_strdup(info->nmem, info->input_charset);
398 info->output_charset = nmem_strdup(info->nmem, info->output_charset);
402 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
404 struct marc_info *mi = info;
407 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
408 yaz_marc_t mt = yaz_marc_create();
410 yaz_marc_xml(mt, mi->output_format_mode);
412 yaz_marc_leader_spec(mt, mi->leader_spec);
415 yaz_marc_iconv(mt, cd);
416 if (mi->input_format_mode == YAZ_MARC_ISO2709)
418 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
425 else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
426 mi->input_format_mode == YAZ_MARC_TURBOMARC)
428 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
432 wrbuf_printf(wr_error, "xmlParseMemory failed");
437 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
439 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
445 wrbuf_printf(wr_error, "unsupported input format");
450 wrbuf_rewind(record);
451 ret = yaz_marc_write_mode(mt, record);
453 wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
457 yaz_marc_destroy(mt);
461 static void destroy_marc(void *info)
463 struct marc_info *mi = info;
465 nmem_destroy(mi->nmem);
468 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
469 struct yaz_record_conv_type *types)
471 struct yaz_record_conv_type bt[2];
474 bt[0].construct = construct_marc;
475 bt[0].convert = convert_marc;
476 bt[0].destroy = destroy_marc;
482 bt[1].construct = construct_xslt;
483 bt[1].convert = convert_xslt;
484 bt[1].destroy = destroy_xslt;
489 yaz_record_conv_reset(p);
491 /* parsing element children */
492 for (ptr = ptr->children; ptr; ptr = ptr->next)
494 struct yaz_record_conv_type *t;
495 struct yaz_record_conv_rule *r;
497 if (ptr->type != XML_ELEMENT_NODE)
499 for (t = &bt[0]; t; t = t->next)
501 wrbuf_rewind(p->wr_error);
502 info = t->construct(ptr, p->path, p->wr_error);
504 if (info || wrbuf_len(p->wr_error))
506 /* info== 0 and no error reported , ie not handled by it */
510 if (wrbuf_len(p->wr_error) == 0)
511 wrbuf_printf(p->wr_error, "Element <backend>: expected "
512 "<marc> or <xslt> element, got <%s>"
516 r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
519 r->type = nmem_malloc(p->nmem, sizeof(*t));
520 memcpy(r->type, t, sizeof(*t));
522 p->rules_p = &r->next;
527 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
529 return yaz_record_conv_configure_t(p, ptr, 0);
532 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
533 struct yaz_record_conv_rule *r,
534 const char *input_record_buf,
535 size_t input_record_len,
539 WRBUF record = output_record; /* pointer transfer */
540 wrbuf_rewind(p->wr_error);
542 wrbuf_write(record, input_record_buf, input_record_len);
543 for (; ret == 0 && r; r = r->next)
544 ret = r->type->convert(r->info, record, p->wr_error);
548 int yaz_record_conv_opac_record(yaz_record_conv_t p,
549 Z_OPACRecord *input_record,
553 struct yaz_record_conv_rule *r = p->rules;
554 if (!r || r->type->construct != construct_marc)
555 ret = -1; /* no marc rule so we can't do OPAC */
558 struct marc_info *mi = r->info;
560 WRBUF res = wrbuf_alloc();
561 yaz_marc_t mt = yaz_marc_create();
562 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
565 wrbuf_rewind(p->wr_error);
566 yaz_marc_xml(mt, mi->output_format_mode);
568 yaz_marc_iconv(mt, cd);
570 yaz_opac_decode_wrbuf(mt, input_record, res);
573 ret = yaz_record_conv_record_rule(p,
575 wrbuf_buf(res), wrbuf_len(res),
578 yaz_marc_destroy(mt);
586 int yaz_record_conv_record(yaz_record_conv_t p,
587 const char *input_record_buf,
588 size_t input_record_len,
591 return yaz_record_conv_record_rule(p, p->rules,
593 input_record_len, output_record);
596 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
598 return wrbuf_cstr(p->wr_error);
601 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
606 p->path = xstrdup(path);
609 yaz_record_conv_t yaz_record_conv_create()
611 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
612 p->nmem = nmem_create();
613 p->wr_error = wrbuf_alloc();
628 * c-file-style: "Stroustrup"
629 * indent-tabs-mode: nil
631 * vim: shiftwidth=4 tabstop=8 expandtab