Changed messages for XSLT conversion errors.
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.17 2007-12-16 11:08:51 adam Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24 #include <yaz/z-opac.h>
25
26 #if YAZ_HAVE_XML2
27 #include <libxml/parser.h>
28 #include <libxml/tree.h>
29 #include <libxml/xinclude.h>
30 #if YAZ_HAVE_XSLT
31 #include <libxslt/xsltutils.h>
32 #include <libxslt/transform.h>
33 #endif
34 #if YAZ_HAVE_EXSLT
35 #include <libexslt/exslt.h>
36 #endif
37
38 /** \brief The internal structure for yaz_record_conv_t */
39 struct yaz_record_conv_struct {
40     /** \brief memory for configuration */
41     NMEM nmem;
42
43     /** \brief conversion rules (allocated using NMEM) */
44     struct yaz_record_conv_rule *rules;
45
46     /** \brief pointer to last conversion rule pointer in chain */
47     struct yaz_record_conv_rule **rules_p;
48
49     /** \brief string buffer for error messages */
50     WRBUF wr_error;
51
52     /** \brief path for opening files  */
53     char *path;
54 };
55
56 /** \brief tranformation types (rule types) */
57 enum YAZ_RECORD_CONV_RULE 
58 {
59     YAZ_RECORD_CONV_RULE_XSLT,
60     YAZ_RECORD_CONV_RULE_MARC
61 };
62
63
64 /** \brief tranformation info (rule info) */
65 struct yaz_record_conv_rule {
66     enum YAZ_RECORD_CONV_RULE which;
67     union {
68 #if YAZ_HAVE_XSLT
69         struct {
70             xsltStylesheetPtr xsp;
71         } xslt;
72 #endif
73         struct {
74             yaz_iconv_t iconv_t;
75             int input_format;
76             int output_format;
77         } marc;
78     } u;
79     struct yaz_record_conv_rule *next;
80 };
81
82 /** \brief reset rules+configuration */
83 static void yaz_record_conv_reset(yaz_record_conv_t p)
84 {
85
86     struct yaz_record_conv_rule *r;
87     for (r = p->rules; r; r = r->next)
88     {
89         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
90         {
91             if (r->u.marc.iconv_t)
92                 yaz_iconv_close(r->u.marc.iconv_t);
93         }
94 #if YAZ_HAVE_XSLT
95         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
96         {
97             xsltFreeStylesheet(r->u.xslt.xsp);
98         }
99 #endif
100     }
101     wrbuf_rewind(p->wr_error);
102     nmem_reset(p->nmem);
103
104     p->rules = 0;
105
106     p->rules_p = &p->rules;
107 }
108
109 yaz_record_conv_t yaz_record_conv_create()
110 {
111     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
112     p->nmem = nmem_create();
113     p->wr_error = wrbuf_alloc();
114     p->rules = 0;
115     p->path = 0;
116
117 #if YAZ_HAVE_EXSLT
118     exsltRegisterAll(); 
119 #endif
120     yaz_record_conv_reset(p);
121     return p;
122 }
123
124 void yaz_record_conv_destroy(yaz_record_conv_t p)
125 {
126     if (p)
127     {
128         yaz_record_conv_reset(p);
129         nmem_destroy(p->nmem);
130         wrbuf_destroy(p->wr_error);
131         xfree(p->path);
132         xfree(p);
133     }
134 }
135
136 /** \brief adds a rule */
137 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
138                                              enum YAZ_RECORD_CONV_RULE type)
139 {
140     struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
141         nmem_malloc(p->nmem, sizeof(*r));
142     r->which = type;
143     r->next = 0;
144     *p->rules_p = r;
145     p->rules_p = &r->next;
146     return r;
147 }
148
149 /** \brief parse 'xslt' conversion node */
150 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
151 {
152 #if YAZ_HAVE_XSLT
153     struct _xmlAttr *attr;
154     const char *stylesheet = 0;
155
156     for (attr = ptr->properties; attr; attr = attr->next)
157     {
158         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
159             attr->children && attr->children->type == XML_TEXT_NODE)
160             stylesheet = (const char *) attr->children->content;
161         else
162         {
163             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
164                          "Expected stylesheet.", attr->name);
165             return -1;
166         }
167     }
168     if (!stylesheet)
169     {
170         wrbuf_printf(p->wr_error, "Element <xslt>: "
171                      "attribute 'stylesheet' expected");
172         return -1;
173     }
174     else
175     {
176         char fullpath[1024];
177         xsltStylesheetPtr xsp;
178         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
179         {
180             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
181                          " could not locate stylesheet '%s'",
182                          stylesheet, fullpath);
183             if (p->path)
184                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
185                 
186             return -1;
187         }
188         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
189         if (!xsp)
190         {
191             wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
192                          " xslt parse failed: %s", stylesheet, fullpath);
193             if (p->path)
194                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
195             wrbuf_printf(p->wr_error, " ("
196 #if YAZ_HAVE_EXSLT
197                          
198                          "EXSLT enabled"
199 #else
200                          "EXSLT not supported"
201 #endif
202                          ")");
203             return -1;
204         }
205         else
206         {
207             struct yaz_record_conv_rule *r = 
208                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
209             r->u.xslt.xsp = xsp;
210         }
211     }
212     return 0;
213 #else
214     wrbuf_printf(p->wr_error, "xslt unsupported."
215                  " YAZ compiled without XSLT support");
216     return -1;
217 #endif
218 }
219
220 /** \brief parse 'marc' conversion node */
221 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
222 {
223     struct _xmlAttr *attr;
224     const char *input_charset = 0;
225     const char *output_charset = 0;
226     const char *input_format = 0;
227     const char *output_format = 0;
228     int input_format_mode = 0;
229     int output_format_mode = 0;
230     struct yaz_record_conv_rule *r;
231     yaz_iconv_t cd = 0;
232
233     for (attr = ptr->properties; attr; attr = attr->next)
234     {
235         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
236             attr->children && attr->children->type == XML_TEXT_NODE)
237             input_charset = (const char *) attr->children->content;
238         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
239             attr->children && attr->children->type == XML_TEXT_NODE)
240             output_charset = (const char *) attr->children->content;
241         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
242             attr->children && attr->children->type == XML_TEXT_NODE)
243             input_format = (const char *) attr->children->content;
244         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
245             attr->children && attr->children->type == XML_TEXT_NODE)
246             output_format = (const char *) attr->children->content;
247         else
248         {
249             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
250                          "'inputformat', 'inputcharset', 'outputformat' or"
251                          " 'outputcharset', got attribute '%s'", 
252                          attr->name);
253             return -1;
254         }
255     }
256     if (!input_format)
257     {
258         wrbuf_printf(p->wr_error, "Element <marc>: "
259                      "attribute 'inputformat' required");
260         return -1;
261     }
262     else if (!strcmp(input_format, "marc"))
263     {
264         input_format_mode = YAZ_MARC_ISO2709;
265     }
266     else if (!strcmp(input_format, "xml"))
267     {
268         input_format_mode = YAZ_MARC_MARCXML;
269         /** Libxml2 generates UTF-8 encoding by default .
270             So we convert from UTF-8 to outputcharset (if defined) 
271         */
272         if (!input_charset && output_charset)
273             input_charset = "utf-8";
274     }
275     else
276     {
277         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
278                      " Unsupported input format"
279                      " defined by attribute value", 
280                      input_format);
281         return -1;
282     }
283     
284     if (!output_format)
285     {
286         wrbuf_printf(p->wr_error, 
287                      "Element <marc>: attribute 'outputformat' required");
288         return -1;
289     }
290     else if (!strcmp(output_format, "line"))
291     {
292         output_format_mode = YAZ_MARC_LINE;
293     }
294     else if (!strcmp(output_format, "marcxml"))
295     {
296         output_format_mode = YAZ_MARC_MARCXML;
297         if (input_charset && !output_charset)
298             output_charset = "utf-8";
299     }
300     else if (!strcmp(output_format, "marc"))
301     {
302         output_format_mode = YAZ_MARC_ISO2709;
303     }
304     else if (!strcmp(output_format, "marcxchange"))
305     {
306         output_format_mode = YAZ_MARC_XCHANGE;
307         if (input_charset && !output_charset)
308             output_charset = "utf-8";
309     }
310     else
311     {
312         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
313                      " Unsupported output format"
314                      " defined by attribute value", 
315                      output_format);
316         return -1;
317     }
318     if (input_charset && output_charset)
319     {
320         cd = yaz_iconv_open(output_charset, input_charset);
321         if (!cd)
322         {
323             wrbuf_printf(p->wr_error, 
324                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
325                          " Unsupported character set mapping"
326                          " defined by attribute values",
327                          input_charset, output_charset);
328             return -1;
329         }
330     }
331     else if (input_charset)
332     {
333         wrbuf_printf(p->wr_error, "Element <marc>: "
334                      "attribute 'outputcharset' missing");
335         return -1;
336     }
337     else if (output_charset)
338     {
339         wrbuf_printf(p->wr_error, "Element <marc>: "
340                      "attribute 'inputcharset' missing");
341         return -1;
342     }
343     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
344     r->u.marc.iconv_t = cd;
345
346     r->u.marc.input_format = input_format_mode;
347     r->u.marc.output_format = output_format_mode;
348     return 0;
349 }
350
351 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
352 {
353     yaz_record_conv_reset(p);
354
355     /* parsing element children */
356     for (ptr = ptr->children; ptr; ptr = ptr->next)
357         {
358             if (ptr->type != XML_ELEMENT_NODE)
359                 continue;
360             if (!strcmp((const char *) ptr->name, "xslt"))
361                 {
362                     if (conv_xslt(p, ptr))
363                         return -1;
364                 }
365             else if (!strcmp((const char *) ptr->name, "marc"))
366                 {
367                     if (conv_marc(p, ptr))
368                         return -1;
369                 }
370             else
371                 {
372                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
373                                  "<marc> or <xslt> element, got <%s>"
374                                  , ptr->name);
375                     return -1;
376                 }
377         }
378     return 0;
379 }
380
381 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
382                                        struct yaz_record_conv_rule *r,
383                                        const char *input_record_buf,
384                                        size_t input_record_len,
385                                        WRBUF output_record);
386
387 int yaz_record_conv_opac_record(yaz_record_conv_t p,
388                                 Z_OPACRecord *input_record,
389                                 WRBUF output_record)
390 {
391     int ret = 0;
392     struct yaz_record_conv_rule *r = p->rules;
393     WRBUF res = wrbuf_alloc();
394     yaz_marc_t mt = yaz_marc_create();
395     
396     wrbuf_rewind(p->wr_error);
397     yaz_marc_xml(mt, r->u.marc.output_format);
398     if (r->u.marc.iconv_t)
399         yaz_marc_iconv(mt, r->u.marc.iconv_t);
400     yaz_opac_decode_wrbuf(mt, input_record, res);
401     if (ret != -1)
402     {
403         ret = yaz_record_conv_record_rule(p, 
404                                           r->next,
405                                           wrbuf_buf(res), wrbuf_len(res),
406                                           output_record);
407     }
408     yaz_marc_destroy(mt);
409     wrbuf_destroy(res);
410     return ret;
411 }
412
413 int yaz_record_conv_record(yaz_record_conv_t p,
414                            const char *input_record_buf,
415                            size_t input_record_len,
416                            WRBUF output_record)
417 {
418     return yaz_record_conv_record_rule(p, p->rules,
419                                        input_record_buf,
420                                        input_record_len, output_record);
421 }
422
423 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
424                                        struct yaz_record_conv_rule *r,
425                                        const char *input_record_buf,
426                                        size_t input_record_len,
427                                        WRBUF output_record)
428 {
429     int ret = 0;
430     WRBUF record = output_record; /* pointer transfer */
431     wrbuf_rewind(p->wr_error);
432     
433     wrbuf_write(record, input_record_buf, input_record_len);
434     for (; ret == 0 && r; r = r->next)
435     {
436         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
437         {
438             yaz_marc_t mt = yaz_marc_create();
439
440             yaz_marc_xml(mt, r->u.marc.output_format);
441
442             if (r->u.marc.iconv_t)
443                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
444             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
445             {
446                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
447                                                wrbuf_len(record));
448                 if (sz > 0)
449                     ret = 0;
450                 else
451                     ret = -1;
452             }
453             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
454             {
455                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
456                                                wrbuf_len(record));
457                 if (!doc)
458                 {
459                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
460                     ret = -1;
461                 }
462                 else
463                 {
464                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
465                     if (ret)
466                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
467                 }
468                 xmlFreeDoc(doc);
469             }
470             else
471             {
472                 wrbuf_printf(p->wr_error, "unsupported input format");
473                 ret = -1;
474             }
475             if (ret == 0)
476             {
477                 wrbuf_rewind(record);
478                 ret = yaz_marc_write_mode(mt, record);
479                 if (ret)
480                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
481             }
482             yaz_marc_destroy(mt);
483         }
484 #if YAZ_HAVE_XSLT
485         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
486         {
487             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
488                                            wrbuf_len(record));
489             if (!doc)
490             {
491                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
492                 ret = -1;
493             }
494             else
495             {
496                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
497                 if (res)
498                 {
499                     xmlChar *out_buf = 0;
500                     int out_len;
501
502 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
503                     xsltSaveResultToString(&out_buf, &out_len, res,
504                                            r->u.xslt.xsp); 
505 #else
506                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
507 #endif
508                     if (!out_buf)
509                     {
510                         wrbuf_printf(p->wr_error,
511                                      "xsltSaveResultToString failed");
512                         ret = -1;
513                     }
514                     else
515                     {
516                         wrbuf_rewind(record);
517                         wrbuf_write(record, (const char *) out_buf, out_len);
518                         
519                         xmlFree(out_buf);
520                     }
521                     xmlFreeDoc(res);
522                 }
523                 else
524                 {
525                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
526                     ret = -1;
527                 }
528                 xmlFreeDoc(doc);
529             }
530         }
531 #endif
532     }
533     return ret;
534 }
535
536 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
537 {
538     return wrbuf_cstr(p->wr_error);
539 }
540
541 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
542 {
543     xfree(p->path);
544     p->path = 0;
545     if (path)
546         p->path = xstrdup(path);
547 }
548 #endif
549
550 /*
551  * Local variables:
552  * c-basic-offset: 4
553  * indent-tabs-mode: nil
554  * End:
555  * vim: shiftwidth=4 tabstop=8 expandtab
556  */
557