e392e61fd492d3a062a96710c53706286f6ec663
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2006, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.2 2006-05-03 13:04:46 adam Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24
25 #if HAVE_XSLT
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31
32 /** \brief The internal structure for yaz_record_conv_t */
33 struct yaz_record_conv_struct {
34     /** memory for configuration */
35     NMEM nmem;
36
37     /** conversion rules (allocated using NMEM) */
38     struct yaz_record_conv_rule *rules;
39
40     /** pointer to last conversion rule pointer in chain */
41     struct yaz_record_conv_rule **rules_p;
42
43     /** string buffer for error messages */
44     WRBUF wr_error;
45
46     /** path for opening files  */
47     char *path;
48 };
49
50 /** \brief tranformation types (rule types) */
51 enum YAZ_RECORD_CONV_RULE 
52 {
53     YAZ_RECORD_CONV_RULE_XSLT,
54     YAZ_RECORD_CONV_RULE_MARC
55 };
56
57 /** \brief tranformation info (rule info) */
58 struct yaz_record_conv_rule {
59     enum YAZ_RECORD_CONV_RULE which;
60     union {
61         struct {
62             xsltStylesheetPtr xsp;
63             int dummy;
64         } xslt;
65         struct {
66             yaz_iconv_t iconv_t;
67             int input_format;
68             int output_format;
69         } marc;
70     } u;
71     struct yaz_record_conv_rule *next;
72 };
73
74 /** reset rules+configuration */
75 static void yaz_record_conv_reset(yaz_record_conv_t p)
76 {
77     struct yaz_record_conv_rule *r;
78     for (r = p->rules; r; r = r->next)
79     {
80         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
81         {
82             if (r->u.marc.iconv_t)
83                 yaz_iconv_close(r->u.marc.iconv_t);
84         }
85         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
86         {
87             xsltFreeStylesheet(r->u.xslt.xsp);
88         }
89     }
90     wrbuf_rewind(p->wr_error);
91     nmem_reset(p->nmem);
92
93     p->rules = 0;
94
95     p->rules_p = &p->rules;
96 }
97
98 yaz_record_conv_t yaz_record_conv_create()
99 {
100     yaz_record_conv_t p = xmalloc(sizeof(*p));
101     p->nmem = nmem_create();
102     p->wr_error = wrbuf_alloc();
103     p->rules = 0;
104     p->path = 0;
105
106     yaz_record_conv_reset(p);
107     return p;
108 }
109
110 void yaz_record_conv_destroy(yaz_record_conv_t p)
111 {
112     if (p)
113     {
114         yaz_record_conv_reset(p);
115         nmem_destroy(p->nmem);
116         wrbuf_free(p->wr_error, 1);
117         xfree(p->path);
118         xfree(p);
119     }
120 }
121
122 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
123                                              enum YAZ_RECORD_CONV_RULE type)
124 {
125     struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
126     r->which = type;
127     r->next = 0;
128     *p->rules_p = r;
129     p->rules_p = &r->next;
130     return r;
131 }
132
133 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
134 {
135     struct _xmlAttr *attr;
136     const char *stylesheet = 0;
137
138     for (attr = ptr->properties; attr; attr = attr->next)
139     {
140         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
141             attr->children && attr->children->type == XML_TEXT_NODE)
142             stylesheet = (const char *) attr->children->content;
143         else
144         {
145             wrbuf_printf(p->wr_error, "Bad attribute '%s'."
146                          "Expected stylesheet.", attr->name);
147             return -1;
148         }
149     }
150     if (!stylesheet)
151     {
152         wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
153         return -1;
154     }
155     else
156     {
157         char fullpath[1024];
158         xsltStylesheetPtr xsp;
159         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
160         {
161             wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
162                          stylesheet, p->path);
163             return -1;
164         }
165         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
166         if (!xsp)
167         {
168             wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
169             return -1;
170         }
171         else
172         {
173             struct yaz_record_conv_rule *r = 
174                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
175             r->u.xslt.xsp = xsp;
176         }
177     }
178     return 0;
179 }
180
181 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
182 {
183     struct _xmlAttr *attr;
184     const char *input_charset = 0;
185     const char *output_charset = 0;
186     const char *input_format = 0;
187     const char *output_format = 0;
188     int input_format_mode = 0;
189     int output_format_mode = 0;
190     struct yaz_record_conv_rule *r;
191     yaz_iconv_t cd = 0;
192
193     for (attr = ptr->properties; attr; attr = attr->next)
194     {
195         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
196             attr->children && attr->children->type == XML_TEXT_NODE)
197             input_charset = (const char *) attr->children->content;
198         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
199             attr->children && attr->children->type == XML_TEXT_NODE)
200             output_charset = (const char *) attr->children->content;
201         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
202             attr->children && attr->children->type == XML_TEXT_NODE)
203             input_format = (const char *) attr->children->content;
204         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
205             attr->children && attr->children->type == XML_TEXT_NODE)
206             output_format = (const char *) attr->children->content;
207         else
208         {
209             wrbuf_printf(p->wr_error, "Bad attribute '%s'.", attr->name);
210             return -1;
211         }
212     }
213     if (!input_format)
214     {
215         wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
216         return -1;
217     }
218     else if (!strcmp(input_format, "marc"))
219     {
220         input_format_mode = YAZ_MARC_ISO2709;
221     }
222     else if (!strcmp(input_format, "xml"))
223     {
224         input_format_mode = YAZ_MARC_MARCXML;
225         /** Libxml2 generates UTF-8 encoding by default .
226             So we convert from UTF-8 to outputcharset (if defined) 
227         */
228         if (!input_charset && output_charset)
229             input_charset = "utf-8";
230     }
231     else
232     {
233         wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
234         return -1;
235     }
236     
237     if (!output_format)
238     {
239         wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
240         return -1;
241     }
242     else if (!strcmp(output_format, "line"))
243     {
244         output_format_mode = YAZ_MARC_LINE;
245     }
246     else if (!strcmp(output_format, "marcxml"))
247     {
248         output_format_mode = YAZ_MARC_MARCXML;
249         if (input_charset && !output_charset)
250             output_charset = "utf-8";
251     }
252     else if (!strcmp(output_format, "marc"))
253     {
254         output_format_mode = YAZ_MARC_ISO2709;
255     }
256     else if (!strcmp(output_format, "marcxchange"))
257     {
258         output_format_mode = YAZ_MARC_XCHANGE;
259         if (input_charset && !output_charset)
260             output_charset = "utf-8";
261     }
262     else
263     {
264         wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
265         return -1;
266     }
267     if (input_charset && output_charset)
268     {
269         cd = yaz_iconv_open(output_charset, input_charset);
270         if (!cd)
271         {
272             wrbuf_printf(p->wr_error, "Unsupported character set mamping"
273                          " inputcharset=%s outputcharset=%s",
274                          input_charset, output_charset);
275             return -1;
276         }
277     }
278     else if (input_charset)
279     {
280         wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
281         return -1;
282     }
283     else if (output_charset)
284     {
285         wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
286         return -1;
287     }
288     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
289     r->u.marc.iconv_t = cd;
290
291     r->u.marc.input_format = input_format_mode;
292     r->u.marc.output_format = output_format_mode;
293     return 0;
294 }
295
296 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
297 {
298     const xmlNode *ptr = ptr_v; 
299
300     yaz_record_conv_reset(p);
301
302     if (ptr && ptr->type == XML_ELEMENT_NODE &&
303         !strcmp((const char *) ptr->name, "convert"))
304     {
305         for (ptr = ptr->children; ptr; ptr = ptr->next)
306         {
307             if (ptr->type != XML_ELEMENT_NODE)
308                 continue;
309             if (!strcmp((const char *) ptr->name, "xslt"))
310             {
311                 if (conv_xslt(p, ptr))
312                     return -1;
313             }
314             else if (!strcmp((const char *) ptr->name, "marc"))
315             {
316                 if (conv_marc(p, ptr))
317                     return -1;
318             }
319             else
320             {
321                 wrbuf_printf(p->wr_error, "Bad element '%s'."
322                              "Expected marc, xslt, ..", ptr->name);
323                 return -1;
324             }
325         }
326     }
327     else
328     {
329         wrbuf_printf(p->wr_error, "Missing 'convert' element");
330         return -1;
331     }
332     return 0;
333 }
334
335 int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
336                            WRBUF output_record)
337 {
338     int ret = 0;
339     WRBUF record = output_record; /* pointer transfer */
340     struct yaz_record_conv_rule *r = p->rules;
341     wrbuf_rewind(p->wr_error);
342     
343     wrbuf_puts(record, input_record);
344     for (; ret == 0 && r; r = r->next)
345     {
346         if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
347         {
348             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
349                                            wrbuf_len(record));
350             if (!doc)
351             {
352                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
353                 ret = -1;
354             }
355             else
356             {
357                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
358                 if (res)
359                 {
360                     xmlChar *out_buf;
361                     int out_len;
362                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
363
364                     wrbuf_rewind(record);
365                     wrbuf_write(record, (const char *) out_buf, out_len);
366
367                     xmlFree(out_buf);
368                     xmlFreeDoc(res);
369                 }
370                 else
371                 {
372                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
373                     ret = -1;
374                 }
375                 xmlFreeDoc(doc);
376             }
377         }
378         else if (r->which == YAZ_RECORD_CONV_RULE_MARC)
379         {
380             yaz_marc_t mt = yaz_marc_create();
381
382             yaz_marc_xml(mt, r->u.marc.output_format);
383
384             if (r->u.marc.iconv_t)
385                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
386             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
387             {
388                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
389                                                wrbuf_len(record));
390                 if (sz > 0)
391                     ret = 0;
392                 else
393                     ret = -1;
394             }
395             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
396             {
397                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
398                                                wrbuf_len(record));
399                 if (!doc)
400                 {
401                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
402                     ret = -1;
403                 }
404                 else
405                 {
406                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
407                     if (ret)
408                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
409                 }
410                 xmlFreeDoc(doc);
411             }
412             else
413             {
414                 wrbuf_printf(p->wr_error, "unsupported input format");
415                 ret = -1;
416             }
417             if (ret == 0)
418             {
419                 wrbuf_rewind(record);
420                 ret = yaz_marc_write_mode(mt, record);
421                 if (ret)
422                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
423             }
424             yaz_marc_destroy(mt);
425         }
426     }
427     return ret;
428 }
429
430 #else
431 /* !HAVE_XSLT */
432 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
433 {
434     wrbuf_rewind(p->wr_error);
435     wrbuf_printf(p->wr_error, "No XML support: yaz_record_conv_configure");
436     return -1;
437 }
438
439 int yaz_record_conv_record(yaz_record_conv_t p, const char *input_record,
440                            WRBUF output_record);
441 {
442     wrbuf_rewind(p->wr_error);
443     wrbuf_printf(p->wr_error, "No XML support: yaz_record_conv_record");
444     return -1;
445 }
446
447 #endif
448
449 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
450 {
451     return wrbuf_buf(p->wr_error);
452 }
453
454 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
455 {
456     if (p)
457     {
458         xfree(p->path);
459         p->path = 0;
460         if (path)
461             p->path = xstrdup(path);
462     }
463 }
464
465 /*
466  * Local variables:
467  * c-basic-offset: 4
468  * indent-tabs-mode: nil
469  * End:
470  * vim: shiftwidth=4 tabstop=8 expandtab
471  */
472