Use oid_class rather than int for OID class.
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.16 2007-05-06 20:12:20 adam Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24
25 #if YAZ_HAVE_XML2
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #if YAZ_HAVE_XSLT
30 #include <libxslt/xsltutils.h>
31 #include <libxslt/transform.h>
32 #endif
33 #if YAZ_HAVE_EXSLT
34 #include <libexslt/exslt.h>
35 #endif
36
37 /** \brief The internal structure for yaz_record_conv_t */
38 struct yaz_record_conv_struct {
39     /** \brief memory for configuration */
40     NMEM nmem;
41
42     /** \brief conversion rules (allocated using NMEM) */
43     struct yaz_record_conv_rule *rules;
44
45     /** \brief pointer to last conversion rule pointer in chain */
46     struct yaz_record_conv_rule **rules_p;
47
48     /** \brief string buffer for error messages */
49     WRBUF wr_error;
50
51     /** \brief path for opening files  */
52     char *path;
53 };
54
55 /** \brief tranformation types (rule types) */
56 enum YAZ_RECORD_CONV_RULE 
57 {
58     YAZ_RECORD_CONV_RULE_XSLT,
59     YAZ_RECORD_CONV_RULE_MARC
60 };
61
62
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65     enum YAZ_RECORD_CONV_RULE which;
66     union {
67 #if YAZ_HAVE_XSLT
68         struct {
69             xsltStylesheetPtr xsp;
70         } xslt;
71 #endif
72         struct {
73             yaz_iconv_t iconv_t;
74             int input_format;
75             int output_format;
76         } marc;
77     } u;
78     struct yaz_record_conv_rule *next;
79 };
80
81 /** \brief reset rules+configuration */
82 static void yaz_record_conv_reset(yaz_record_conv_t p)
83 {
84
85     struct yaz_record_conv_rule *r;
86     for (r = p->rules; r; r = r->next)
87     {
88         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
89         {
90             if (r->u.marc.iconv_t)
91                 yaz_iconv_close(r->u.marc.iconv_t);
92         }
93 #if YAZ_HAVE_XSLT
94         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
95         {
96             xsltFreeStylesheet(r->u.xslt.xsp);
97         }
98 #endif
99     }
100     wrbuf_rewind(p->wr_error);
101     nmem_reset(p->nmem);
102
103     p->rules = 0;
104
105     p->rules_p = &p->rules;
106 }
107
108 yaz_record_conv_t yaz_record_conv_create()
109 {
110     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
111     p->nmem = nmem_create();
112     p->wr_error = wrbuf_alloc();
113     p->rules = 0;
114     p->path = 0;
115
116 #if YAZ_HAVE_EXSLT
117     exsltRegisterAll(); 
118 #endif
119     yaz_record_conv_reset(p);
120     return p;
121 }
122
123 void yaz_record_conv_destroy(yaz_record_conv_t p)
124 {
125     if (p)
126     {
127         yaz_record_conv_reset(p);
128         nmem_destroy(p->nmem);
129         wrbuf_destroy(p->wr_error);
130         xfree(p->path);
131         xfree(p);
132     }
133 }
134
135 /** \brief adds a rule */
136 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
137                                              enum YAZ_RECORD_CONV_RULE type)
138 {
139     struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
140         nmem_malloc(p->nmem, sizeof(*r));
141     r->which = type;
142     r->next = 0;
143     *p->rules_p = r;
144     p->rules_p = &r->next;
145     return r;
146 }
147
148 /** \brief parse 'xslt' conversion node */
149 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
150 {
151 #if YAZ_HAVE_XSLT
152     struct _xmlAttr *attr;
153     const char *stylesheet = 0;
154
155     for (attr = ptr->properties; attr; attr = attr->next)
156     {
157         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
158             attr->children && attr->children->type == XML_TEXT_NODE)
159             stylesheet = (const char *) attr->children->content;
160         else
161         {
162             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
163                          "Expected stylesheet.", attr->name);
164             return -1;
165         }
166     }
167     if (!stylesheet)
168     {
169         wrbuf_printf(p->wr_error, "Element <xslt>: "
170                      "attribute 'stylesheet' expected");
171         return -1;
172     }
173     else
174     {
175         char fullpath[1024];
176         xsltStylesheetPtr xsp;
177         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
178         {
179             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
180                          " could not locate stylesheet '%s' with path '%s'",
181                          stylesheet, fullpath, p->path);
182             return -1;
183         }
184         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
185         if (!xsp)
186         {
187             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
188                          " parsing stylesheet '%s' with path '%s' failed,"
189 #if YAZ_HAVE_EXSLT
190                          " EXSLT enabled",
191 #else
192                          " EXSLT not supported",
193 #endif
194                          stylesheet, fullpath, p->path);
195             return -1;
196         }
197         else
198         {
199             struct yaz_record_conv_rule *r = 
200                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
201             r->u.xslt.xsp = xsp;
202         }
203     }
204     return 0;
205 #else
206     wrbuf_printf(p->wr_error, "xslt unsupported."
207                  " YAZ compiled without XSLT support");
208     return -1;
209 #endif
210 }
211
212 /** \brief parse 'marc' conversion node */
213 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
214 {
215     struct _xmlAttr *attr;
216     const char *input_charset = 0;
217     const char *output_charset = 0;
218     const char *input_format = 0;
219     const char *output_format = 0;
220     int input_format_mode = 0;
221     int output_format_mode = 0;
222     struct yaz_record_conv_rule *r;
223     yaz_iconv_t cd = 0;
224
225     for (attr = ptr->properties; attr; attr = attr->next)
226     {
227         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
228             attr->children && attr->children->type == XML_TEXT_NODE)
229             input_charset = (const char *) attr->children->content;
230         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
231             attr->children && attr->children->type == XML_TEXT_NODE)
232             output_charset = (const char *) attr->children->content;
233         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
234             attr->children && attr->children->type == XML_TEXT_NODE)
235             input_format = (const char *) attr->children->content;
236         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
237             attr->children && attr->children->type == XML_TEXT_NODE)
238             output_format = (const char *) attr->children->content;
239         else
240         {
241             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
242                          "'inputformat', 'inputcharset', 'outputformat' or"
243                          " 'outputcharset', got attribute '%s'", 
244                          attr->name);
245             return -1;
246         }
247     }
248     if (!input_format)
249     {
250         wrbuf_printf(p->wr_error, "Element <marc>: "
251                      "attribute 'inputformat' required");
252         return -1;
253     }
254     else if (!strcmp(input_format, "marc"))
255     {
256         input_format_mode = YAZ_MARC_ISO2709;
257     }
258     else if (!strcmp(input_format, "xml"))
259     {
260         input_format_mode = YAZ_MARC_MARCXML;
261         /** Libxml2 generates UTF-8 encoding by default .
262             So we convert from UTF-8 to outputcharset (if defined) 
263         */
264         if (!input_charset && output_charset)
265             input_charset = "utf-8";
266     }
267     else
268     {
269         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
270                      " Unsupported input format"
271                      " defined by attribute value", 
272                      input_format);
273         return -1;
274     }
275     
276     if (!output_format)
277     {
278         wrbuf_printf(p->wr_error, 
279                      "Element <marc>: attribute 'outputformat' required");
280         return -1;
281     }
282     else if (!strcmp(output_format, "line"))
283     {
284         output_format_mode = YAZ_MARC_LINE;
285     }
286     else if (!strcmp(output_format, "marcxml"))
287     {
288         output_format_mode = YAZ_MARC_MARCXML;
289         if (input_charset && !output_charset)
290             output_charset = "utf-8";
291     }
292     else if (!strcmp(output_format, "marc"))
293     {
294         output_format_mode = YAZ_MARC_ISO2709;
295     }
296     else if (!strcmp(output_format, "marcxchange"))
297     {
298         output_format_mode = YAZ_MARC_XCHANGE;
299         if (input_charset && !output_charset)
300             output_charset = "utf-8";
301     }
302     else
303     {
304         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
305                      " Unsupported output format"
306                      " defined by attribute value", 
307                      output_format);
308         return -1;
309     }
310     if (input_charset && output_charset)
311     {
312         cd = yaz_iconv_open(output_charset, input_charset);
313         if (!cd)
314         {
315             wrbuf_printf(p->wr_error, 
316                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
317                          " Unsupported character set mapping"
318                          " defined by attribute values",
319                          input_charset, output_charset);
320             return -1;
321         }
322     }
323     else if (input_charset)
324     {
325         wrbuf_printf(p->wr_error, "Element <marc>: "
326                      "attribute 'outputcharset' missing");
327         return -1;
328     }
329     else if (output_charset)
330     {
331         wrbuf_printf(p->wr_error, "Element <marc>: "
332                      "attribute 'inputcharset' missing");
333         return -1;
334     }
335     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
336     r->u.marc.iconv_t = cd;
337
338     r->u.marc.input_format = input_format_mode;
339     r->u.marc.output_format = output_format_mode;
340     return 0;
341 }
342
343 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
344 {
345     yaz_record_conv_reset(p);
346
347     /* parsing element children */
348     for (ptr = ptr->children; ptr; ptr = ptr->next)
349         {
350             if (ptr->type != XML_ELEMENT_NODE)
351                 continue;
352             if (!strcmp((const char *) ptr->name, "xslt"))
353                 {
354                     if (conv_xslt(p, ptr))
355                         return -1;
356                 }
357             else if (!strcmp((const char *) ptr->name, "marc"))
358                 {
359                     if (conv_marc(p, ptr))
360                         return -1;
361                 }
362             else
363                 {
364                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
365                                  "<marc> or <xslt> element, got <%s>"
366                                  , ptr->name);
367                     return -1;
368                 }
369         }
370     return 0;
371 }
372
373 int yaz_record_conv_record(yaz_record_conv_t p,
374                            const char *input_record_buf,
375                            size_t input_record_len,
376                            WRBUF output_record)
377 {
378     int ret = 0;
379     WRBUF record = output_record; /* pointer transfer */
380     struct yaz_record_conv_rule *r = p->rules;
381     wrbuf_rewind(p->wr_error);
382     
383     wrbuf_write(record, input_record_buf, input_record_len);
384     for (; ret == 0 && r; r = r->next)
385     {
386         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
387         {
388             yaz_marc_t mt = yaz_marc_create();
389
390             yaz_marc_xml(mt, r->u.marc.output_format);
391
392             if (r->u.marc.iconv_t)
393                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
394             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
395             {
396                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
397                                                wrbuf_len(record));
398                 if (sz > 0)
399                     ret = 0;
400                 else
401                     ret = -1;
402             }
403             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
404             {
405                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
406                                                wrbuf_len(record));
407                 if (!doc)
408                 {
409                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
410                     ret = -1;
411                 }
412                 else
413                 {
414                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
415                     if (ret)
416                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
417                 }
418                 xmlFreeDoc(doc);
419             }
420             else
421             {
422                 wrbuf_printf(p->wr_error, "unsupported input format");
423                 ret = -1;
424             }
425             if (ret == 0)
426             {
427                 wrbuf_rewind(record);
428                 ret = yaz_marc_write_mode(mt, record);
429                 if (ret)
430                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
431             }
432             yaz_marc_destroy(mt);
433         }
434 #if YAZ_HAVE_XSLT
435         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
436         {
437             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
438                                            wrbuf_len(record));
439             if (!doc)
440             {
441                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
442                 ret = -1;
443             }
444             else
445             {
446                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
447                 if (res)
448                 {
449                     xmlChar *out_buf = 0;
450                     int out_len;
451
452 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
453                     xsltSaveResultToString(&out_buf, &out_len, res,
454                                            r->u.xslt.xsp); 
455 #else
456                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
457 #endif
458                     if (!out_buf)
459                     {
460                         wrbuf_printf(p->wr_error,
461                                      "xsltSaveResultToString failed");
462                         ret = -1;
463                     }
464                     else
465                     {
466                         wrbuf_rewind(record);
467                         wrbuf_write(record, (const char *) out_buf, out_len);
468                         
469                         xmlFree(out_buf);
470                     }
471                     xmlFreeDoc(res);
472                 }
473                 else
474                 {
475                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
476                     ret = -1;
477                 }
478                 xmlFreeDoc(doc);
479             }
480         }
481 #endif
482     }
483     return ret;
484 }
485
486 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
487 {
488     return wrbuf_cstr(p->wr_error);
489 }
490
491 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
492 {
493     xfree(p->path);
494     p->path = 0;
495     if (path)
496         p->path = xstrdup(path);
497 }
498 #endif
499
500 /*
501  * Local variables:
502  * c-basic-offset: 4
503  * indent-tabs-mode: nil
504  * End:
505  * vim: shiftwidth=4 tabstop=8 expandtab
506  */
507