fd250e5bb7b84af1b17f73cf2c556d4198181c04
[yaz-moved-to-github.git] / src / record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_conv.c
7  * \brief Record Conversions utility
8  */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
20 #include <yaz/nmem.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23
24 #if YAZ_HAVE_XML2
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
28 #if YAZ_HAVE_XSLT
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31 #endif
32 #if YAZ_HAVE_EXSLT
33 #include <libexslt/exslt.h>
34 #endif
35
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38     /** \brief memory for configuration */
39     NMEM nmem;
40
41     /** \brief conversion rules (allocated using NMEM) */
42     struct yaz_record_conv_rule *rules;
43
44     /** \brief pointer to last conversion rule pointer in chain */
45     struct yaz_record_conv_rule **rules_p;
46
47     /** \brief string buffer for error messages */
48     WRBUF wr_error;
49
50     /** \brief path for opening files  */
51     char *path;
52 };
53
54 /** \brief tranformation types (rule types) */
55 enum YAZ_RECORD_CONV_RULE 
56 {
57     YAZ_RECORD_CONV_RULE_XSLT,
58     YAZ_RECORD_CONV_RULE_MARC
59 };
60
61
62 /** \brief tranformation info (rule info) */
63 struct yaz_record_conv_rule {
64     enum YAZ_RECORD_CONV_RULE which;
65     union {
66 #if YAZ_HAVE_XSLT
67         struct {
68             xsltStylesheetPtr xsp;
69         } xslt;
70 #endif
71         struct {
72             const char *input_charset;
73             const char *output_charset;
74             int input_format;
75             int output_format;
76         } marc;
77     } u;
78     struct yaz_record_conv_rule *next;
79 };
80
81 /** \brief reset rules+configuration */
82 static void yaz_record_conv_reset(yaz_record_conv_t p)
83 {
84
85     struct yaz_record_conv_rule *r;
86     for (r = p->rules; r; r = r->next)
87     {
88         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
89         {
90             ;
91         }
92 #if YAZ_HAVE_XSLT
93         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
94         {
95             xsltFreeStylesheet(r->u.xslt.xsp);
96         }
97 #endif
98     }
99     wrbuf_rewind(p->wr_error);
100     nmem_reset(p->nmem);
101
102     p->rules = 0;
103
104     p->rules_p = &p->rules;
105 }
106
107 yaz_record_conv_t yaz_record_conv_create()
108 {
109     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
110     p->nmem = nmem_create();
111     p->wr_error = wrbuf_alloc();
112     p->rules = 0;
113     p->path = 0;
114
115 #if YAZ_HAVE_EXSLT
116     exsltRegisterAll(); 
117 #endif
118     yaz_record_conv_reset(p);
119     return p;
120 }
121
122 void yaz_record_conv_destroy(yaz_record_conv_t p)
123 {
124     if (p)
125     {
126         yaz_record_conv_reset(p);
127         nmem_destroy(p->nmem);
128         wrbuf_destroy(p->wr_error);
129         xfree(p->path);
130         xfree(p);
131     }
132 }
133
134 /** \brief adds a rule */
135 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
136                                              enum YAZ_RECORD_CONV_RULE type)
137 {
138     struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
139         nmem_malloc(p->nmem, sizeof(*r));
140     r->which = type;
141     r->next = 0;
142     *p->rules_p = r;
143     p->rules_p = &r->next;
144     return r;
145 }
146
147 /** \brief parse 'xslt' conversion node */
148 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
149 {
150 #if YAZ_HAVE_XSLT
151     struct _xmlAttr *attr;
152     const char *stylesheet = 0;
153
154     for (attr = ptr->properties; attr; attr = attr->next)
155     {
156         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
157             attr->children && attr->children->type == XML_TEXT_NODE)
158             stylesheet = (const char *) attr->children->content;
159         else
160         {
161             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
162                          "Expected stylesheet.", attr->name);
163             return -1;
164         }
165     }
166     if (!stylesheet)
167     {
168         wrbuf_printf(p->wr_error, "Element <xslt>: "
169                      "attribute 'stylesheet' expected");
170         return -1;
171     }
172     else
173     {
174         char fullpath[1024];
175         xsltStylesheetPtr xsp;
176         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
177         {
178             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
179                          " could not locate stylesheet '%s'",
180                          stylesheet, fullpath);
181             if (p->path)
182                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
183                 
184             return -1;
185         }
186         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
187         if (!xsp)
188         {
189             wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
190                          " xslt parse failed: %s", stylesheet, fullpath);
191             if (p->path)
192                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
193             wrbuf_printf(p->wr_error, " ("
194 #if YAZ_HAVE_EXSLT
195                          
196                          "EXSLT enabled"
197 #else
198                          "EXSLT not supported"
199 #endif
200                          ")");
201             return -1;
202         }
203         else
204         {
205             struct yaz_record_conv_rule *r = 
206                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
207             r->u.xslt.xsp = xsp;
208         }
209     }
210     return 0;
211 #else
212     wrbuf_printf(p->wr_error, "xslt unsupported."
213                  " YAZ compiled without XSLT support");
214     return -1;
215 #endif
216 }
217
218 /** \brief parse 'marc' conversion node */
219 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
220 {
221     struct _xmlAttr *attr;
222     const char *input_charset = 0;
223     const char *output_charset = 0;
224     const char *input_format = 0;
225     const char *output_format = 0;
226     int input_format_mode = 0;
227     int output_format_mode = 0;
228     struct yaz_record_conv_rule *r;
229
230     for (attr = ptr->properties; attr; attr = attr->next)
231     {
232         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
233             attr->children && attr->children->type == XML_TEXT_NODE)
234             input_charset = (const char *) attr->children->content;
235         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
236             attr->children && attr->children->type == XML_TEXT_NODE)
237             output_charset = (const char *) attr->children->content;
238         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
239             attr->children && attr->children->type == XML_TEXT_NODE)
240             input_format = (const char *) attr->children->content;
241         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
242             attr->children && attr->children->type == XML_TEXT_NODE)
243             output_format = (const char *) attr->children->content;
244         else
245         {
246             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
247                          "'inputformat', 'inputcharset', 'outputformat' or"
248                          " 'outputcharset', got attribute '%s'", 
249                          attr->name);
250             return -1;
251         }
252     }
253     if (!input_format)
254     {
255         wrbuf_printf(p->wr_error, "Element <marc>: "
256                      "attribute 'inputformat' required");
257         return -1;
258     }
259     else if (!strcmp(input_format, "marc"))
260     {
261         input_format_mode = YAZ_MARC_ISO2709;
262     }
263     else if (!strcmp(input_format, "xml"))
264     {
265         input_format_mode = YAZ_MARC_MARCXML;
266         /** Libxml2 generates UTF-8 encoding by default .
267             So we convert from UTF-8 to outputcharset (if defined) 
268         */
269         if (!input_charset && output_charset)
270             input_charset = "utf-8";
271     }
272     else
273     {
274         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
275                      " Unsupported input format"
276                      " defined by attribute value", 
277                      input_format);
278         return -1;
279     }
280     
281     if (!output_format)
282     {
283         wrbuf_printf(p->wr_error, 
284                      "Element <marc>: attribute 'outputformat' required");
285         return -1;
286     }
287     else if (!strcmp(output_format, "line"))
288     {
289         output_format_mode = YAZ_MARC_LINE;
290     }
291     else if (!strcmp(output_format, "marcxml"))
292     {
293         output_format_mode = YAZ_MARC_MARCXML;
294         if (input_charset && !output_charset)
295             output_charset = "utf-8";
296     }
297     else if (!strcmp(output_format, "marc"))
298     {
299         output_format_mode = YAZ_MARC_ISO2709;
300     }
301     else if (!strcmp(output_format, "marcxchange"))
302     {
303         output_format_mode = YAZ_MARC_XCHANGE;
304         if (input_charset && !output_charset)
305             output_charset = "utf-8";
306     }
307     else
308     {
309         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
310                      " Unsupported output format"
311                      " defined by attribute value", 
312                      output_format);
313         return -1;
314     }
315     if (input_charset && output_charset)
316     {
317         yaz_iconv_t cd = yaz_iconv_open(output_charset, input_charset);
318         if (!cd)
319         {
320             wrbuf_printf(p->wr_error, 
321                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
322                          " Unsupported character set mapping"
323                          " defined by attribute values",
324                          input_charset, output_charset);
325             return -1;
326         }
327         yaz_iconv_close(cd);
328     }
329     else if (input_charset)
330     {
331         wrbuf_printf(p->wr_error, "Element <marc>: "
332                      "attribute 'outputcharset' missing");
333         return -1;
334     }
335     else if (output_charset)
336     {
337         wrbuf_printf(p->wr_error, "Element <marc>: "
338                      "attribute 'inputcharset' missing");
339         return -1;
340     }
341     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
342
343     r->u.marc.input_charset = nmem_strdup(p->nmem, input_charset);
344     r->u.marc.output_charset = nmem_strdup(p->nmem, output_charset);
345     r->u.marc.input_format = input_format_mode;
346     r->u.marc.output_format = output_format_mode;
347     return 0;
348 }
349
350 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
351 {
352     yaz_record_conv_reset(p);
353
354     /* parsing element children */
355     for (ptr = ptr->children; ptr; ptr = ptr->next)
356         {
357             if (ptr->type != XML_ELEMENT_NODE)
358                 continue;
359             if (!strcmp((const char *) ptr->name, "xslt"))
360                 {
361                     if (conv_xslt(p, ptr))
362                         return -1;
363                 }
364             else if (!strcmp((const char *) ptr->name, "marc"))
365                 {
366                     if (conv_marc(p, ptr))
367                         return -1;
368                 }
369             else
370                 {
371                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
372                                  "<marc> or <xslt> element, got <%s>"
373                                  , ptr->name);
374                     return -1;
375                 }
376         }
377     return 0;
378 }
379
380 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
381                                        struct yaz_record_conv_rule *r,
382                                        const char *input_record_buf,
383                                        size_t input_record_len,
384                                        WRBUF output_record);
385
386 int yaz_record_conv_opac_record(yaz_record_conv_t p,
387                                 Z_OPACRecord *input_record,
388                                 WRBUF output_record)
389 {
390     int ret = 0;
391     struct yaz_record_conv_rule *r = p->rules;
392     if (!r || r->which != YAZ_RECORD_CONV_RULE_MARC)
393         ret = -1; /* no marc rule so we can't do OPAC */
394     else
395     {
396         WRBUF res = wrbuf_alloc();
397         yaz_marc_t mt = yaz_marc_create();
398         yaz_iconv_t cd = yaz_iconv_open(r->u.marc.output_charset,
399                                         r->u.marc.input_charset);
400         
401         wrbuf_rewind(p->wr_error);
402         yaz_marc_xml(mt, r->u.marc.output_format);
403         
404         yaz_marc_iconv(mt, cd);
405         
406         yaz_opac_decode_wrbuf(mt, input_record, res);
407         if (ret != -1)
408         {
409             ret = yaz_record_conv_record_rule(p, 
410                                               r->next,
411                                               wrbuf_buf(res), wrbuf_len(res),
412                                               output_record);
413         }
414         yaz_marc_destroy(mt);
415         if (cd)
416             yaz_iconv_close(cd);
417         wrbuf_destroy(res);
418     }
419     return ret;
420 }
421
422 int yaz_record_conv_record(yaz_record_conv_t p,
423                            const char *input_record_buf,
424                            size_t input_record_len,
425                            WRBUF output_record)
426 {
427     return yaz_record_conv_record_rule(p, p->rules,
428                                        input_record_buf,
429                                        input_record_len, output_record);
430 }
431
432 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
433                                        struct yaz_record_conv_rule *r,
434                                        const char *input_record_buf,
435                                        size_t input_record_len,
436                                        WRBUF output_record)
437 {
438     int ret = 0;
439     WRBUF record = output_record; /* pointer transfer */
440     wrbuf_rewind(p->wr_error);
441     
442     wrbuf_write(record, input_record_buf, input_record_len);
443     for (; ret == 0 && r; r = r->next)
444     {
445         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
446         {
447             yaz_iconv_t cd = 
448                 yaz_iconv_open(r->u.marc.output_charset,
449                                r->u.marc.input_charset);
450             yaz_marc_t mt = yaz_marc_create();
451
452             yaz_marc_xml(mt, r->u.marc.output_format);
453
454             if (cd)
455                 yaz_marc_iconv(mt, cd);
456             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
457             {
458                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
459                                                wrbuf_len(record));
460                 if (sz > 0)
461                     ret = 0;
462                 else
463                     ret = -1;
464             }
465             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
466             {
467                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
468                                                wrbuf_len(record));
469                 if (!doc)
470                 {
471                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
472                     ret = -1;
473                 }
474                 else
475                 {
476                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
477                     if (ret)
478                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
479                 }
480                 xmlFreeDoc(doc);
481             }
482             else
483             {
484                 wrbuf_printf(p->wr_error, "unsupported input format");
485                 ret = -1;
486             }
487             if (ret == 0)
488             {
489                 wrbuf_rewind(record);
490                 ret = yaz_marc_write_mode(mt, record);
491                 if (ret)
492                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
493             }
494             if (cd)
495                 yaz_iconv_close(cd);
496             yaz_marc_destroy(mt);
497         }
498 #if YAZ_HAVE_XSLT
499         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
500         {
501             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
502                                            wrbuf_len(record));
503             if (!doc)
504             {
505                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
506                 ret = -1;
507             }
508             else
509             {
510                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
511                 if (res)
512                 {
513                     xmlChar *out_buf = 0;
514                     int out_len;
515
516 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
517                     xsltSaveResultToString(&out_buf, &out_len, res,
518                                            r->u.xslt.xsp); 
519 #else
520                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
521 #endif
522                     if (!out_buf)
523                     {
524                         wrbuf_printf(p->wr_error,
525                                      "xsltSaveResultToString failed");
526                         ret = -1;
527                     }
528                     else
529                     {
530                         wrbuf_rewind(record);
531                         wrbuf_write(record, (const char *) out_buf, out_len);
532                         
533                         xmlFree(out_buf);
534                     }
535                     xmlFreeDoc(res);
536                 }
537                 else
538                 {
539                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
540                     ret = -1;
541                 }
542                 xmlFreeDoc(doc);
543             }
544         }
545 #endif
546     }
547     return ret;
548 }
549
550 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
551 {
552     return wrbuf_cstr(p->wr_error);
553 }
554
555 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
556 {
557     xfree(p->path);
558     p->path = 0;
559     if (path)
560         p->path = xstrdup(path);
561 }
562 #endif
563
564 /*
565  * Local variables:
566  * c-basic-offset: 4
567  * indent-tabs-mode: nil
568  * End:
569  * vim: shiftwidth=4 tabstop=8 expandtab
570  */
571