2 * Copyright (C) 2005-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: record_conv.c,v 1.2 2006-05-03 13:04:46 adam Exp $
9 * \brief Record Conversions utility
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
23 #include <yaz/tpath.h>
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
32 /** \brief The internal structure for yaz_record_conv_t */
33 struct yaz_record_conv_struct {
34 /** memory for configuration */
37 /** conversion rules (allocated using NMEM) */
38 struct yaz_record_conv_rule *rules;
40 /** pointer to last conversion rule pointer in chain */
41 struct yaz_record_conv_rule **rules_p;
43 /** string buffer for error messages */
46 /** path for opening files */
50 /** \brief tranformation types (rule types) */
51 enum YAZ_RECORD_CONV_RULE
53 YAZ_RECORD_CONV_RULE_XSLT,
54 YAZ_RECORD_CONV_RULE_MARC
57 /** \brief tranformation info (rule info) */
58 struct yaz_record_conv_rule {
59 enum YAZ_RECORD_CONV_RULE which;
62 xsltStylesheetPtr xsp;
71 struct yaz_record_conv_rule *next;
74 /** reset rules+configuration */
75 static void yaz_record_conv_reset(yaz_record_conv_t p)
77 struct yaz_record_conv_rule *r;
78 for (r = p->rules; r; r = r->next)
80 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
82 if (r->u.marc.iconv_t)
83 yaz_iconv_close(r->u.marc.iconv_t);
85 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
87 xsltFreeStylesheet(r->u.xslt.xsp);
90 wrbuf_rewind(p->wr_error);
95 p->rules_p = &p->rules;
98 yaz_record_conv_t yaz_record_conv_create()
100 yaz_record_conv_t p = xmalloc(sizeof(*p));
101 p->nmem = nmem_create();
102 p->wr_error = wrbuf_alloc();
106 yaz_record_conv_reset(p);
110 void yaz_record_conv_destroy(yaz_record_conv_t p)
114 yaz_record_conv_reset(p);
115 nmem_destroy(p->nmem);
116 wrbuf_free(p->wr_error, 1);
122 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
123 enum YAZ_RECORD_CONV_RULE type)
125 struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
129 p->rules_p = &r->next;
133 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
135 struct _xmlAttr *attr;
136 const char *stylesheet = 0;
138 for (attr = ptr->properties; attr; attr = attr->next)
140 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
141 attr->children && attr->children->type == XML_TEXT_NODE)
142 stylesheet = (const char *) attr->children->content;
145 wrbuf_printf(p->wr_error, "Bad attribute '%s'."
146 "Expected stylesheet.", attr->name);
152 wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
158 xsltStylesheetPtr xsp;
159 if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
161 wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
162 stylesheet, p->path);
165 xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
168 wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
173 struct yaz_record_conv_rule *r =
174 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
181 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
183 struct _xmlAttr *attr;
184 const char *input_charset = 0;
185 const char *output_charset = 0;
186 const char *input_format = 0;
187 const char *output_format = 0;
188 int input_format_mode = 0;
189 int output_format_mode = 0;
190 struct yaz_record_conv_rule *r;
193 for (attr = ptr->properties; attr; attr = attr->next)
195 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
196 attr->children && attr->children->type == XML_TEXT_NODE)
197 input_charset = (const char *) attr->children->content;
198 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
199 attr->children && attr->children->type == XML_TEXT_NODE)
200 output_charset = (const char *) attr->children->content;
201 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
202 attr->children && attr->children->type == XML_TEXT_NODE)
203 input_format = (const char *) attr->children->content;
204 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
205 attr->children && attr->children->type == XML_TEXT_NODE)
206 output_format = (const char *) attr->children->content;
209 wrbuf_printf(p->wr_error, "Bad attribute '%s'.", attr->name);
215 wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
218 else if (!strcmp(input_format, "marc"))
220 input_format_mode = YAZ_MARC_ISO2709;
222 else if (!strcmp(input_format, "xml"))
224 input_format_mode = YAZ_MARC_MARCXML;
225 /** Libxml2 generates UTF-8 encoding by default .
226 So we convert from UTF-8 to outputcharset (if defined)
228 if (!input_charset && output_charset)
229 input_charset = "utf-8";
233 wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
239 wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
242 else if (!strcmp(output_format, "line"))
244 output_format_mode = YAZ_MARC_LINE;
246 else if (!strcmp(output_format, "marcxml"))
248 output_format_mode = YAZ_MARC_MARCXML;
249 if (input_charset && !output_charset)
250 output_charset = "utf-8";
252 else if (!strcmp(output_format, "marc"))
254 output_format_mode = YAZ_MARC_ISO2709;
256 else if (!strcmp(output_format, "marcxchange"))
258 output_format_mode = YAZ_MARC_XCHANGE;
259 if (input_charset && !output_charset)
260 output_charset = "utf-8";
264 wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
267 if (input_charset && output_charset)
269 cd = yaz_iconv_open(output_charset, input_charset);
272 wrbuf_printf(p->wr_error, "Unsupported character set mamping"
273 " inputcharset=%s outputcharset=%s",
274 input_charset, output_charset);
278 else if (input_charset)
280 wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
283 else if (output_charset)
285 wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
288 r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
289 r->u.marc.iconv_t = cd;
291 r->u.marc.input_format = input_format_mode;
292 r->u.marc.output_format = output_format_mode;
296 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
298 const xmlNode *ptr = ptr_v;
300 yaz_record_conv_reset(p);
302 if (ptr && ptr->type == XML_ELEMENT_NODE &&
303 !strcmp((const char *) ptr->name, "convert"))
305 for (ptr = ptr->children; ptr; ptr = ptr->next)
307 if (ptr->type != XML_ELEMENT_NODE)
309 if (!strcmp((const char *) ptr->name, "xslt"))
311 if (conv_xslt(p, ptr))
314 else if (!strcmp((const char *) ptr->name, "marc"))
316 if (conv_marc(p, ptr))
321 wrbuf_printf(p->wr_error, "Bad element '%s'."
322 "Expected marc, xslt, ..", ptr->name);
329 wrbuf_printf(p->wr_error, "Missing 'convert' element");
335 int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
339 WRBUF record = output_record; /* pointer transfer */
340 struct yaz_record_conv_rule *r = p->rules;
341 wrbuf_rewind(p->wr_error);
343 wrbuf_puts(record, input_record);
344 for (; ret == 0 && r; r = r->next)
346 if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
348 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
352 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
357 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
362 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
364 wrbuf_rewind(record);
365 wrbuf_write(record, (const char *) out_buf, out_len);
372 wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
378 else if (r->which == YAZ_RECORD_CONV_RULE_MARC)
380 yaz_marc_t mt = yaz_marc_create();
382 yaz_marc_xml(mt, r->u.marc.output_format);
384 if (r->u.marc.iconv_t)
385 yaz_marc_iconv(mt, r->u.marc.iconv_t);
386 if (r->u.marc.input_format == YAZ_MARC_ISO2709)
388 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
395 else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
397 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
401 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
406 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
408 wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
414 wrbuf_printf(p->wr_error, "unsupported input format");
419 wrbuf_rewind(record);
420 ret = yaz_marc_write_mode(mt, record);
422 wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
424 yaz_marc_destroy(mt);
432 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
434 wrbuf_rewind(p->wr_error);
435 wrbuf_printf(p->wr_error, "No XML support: yaz_record_conv_configure");
439 int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
440 WRBUF output_record);
442 wrbuf_rewind(p->wr_error);
443 wrbuf_printf(p->wr_error, "No XML support: yaz_record_conv_record");
449 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
451 return wrbuf_buf(p->wr_error);
454 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
461 p->path = xstrdup(path);
468 * indent-tabs-mode: nil
470 * vim: shiftwidth=4 tabstop=8 expandtab