Handle OPAC for record conversion module.
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.17 2007-12-16 11:08:51 adam Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24 #include <yaz/z-opac.h>
25
26 #if YAZ_HAVE_XML2
27 #include <libxml/parser.h>
28 #include <libxml/tree.h>
29 #include <libxml/xinclude.h>
30 #if YAZ_HAVE_XSLT
31 #include <libxslt/xsltutils.h>
32 #include <libxslt/transform.h>
33 #endif
34 #if YAZ_HAVE_EXSLT
35 #include <libexslt/exslt.h>
36 #endif
37
38 /** \brief The internal structure for yaz_record_conv_t */
39 struct yaz_record_conv_struct {
40     /** \brief memory for configuration */
41     NMEM nmem;
42
43     /** \brief conversion rules (allocated using NMEM) */
44     struct yaz_record_conv_rule *rules;
45
46     /** \brief pointer to last conversion rule pointer in chain */
47     struct yaz_record_conv_rule **rules_p;
48
49     /** \brief string buffer for error messages */
50     WRBUF wr_error;
51
52     /** \brief path for opening files  */
53     char *path;
54 };
55
56 /** \brief tranformation types (rule types) */
57 enum YAZ_RECORD_CONV_RULE 
58 {
59     YAZ_RECORD_CONV_RULE_XSLT,
60     YAZ_RECORD_CONV_RULE_MARC
61 };
62
63
64 /** \brief tranformation info (rule info) */
65 struct yaz_record_conv_rule {
66     enum YAZ_RECORD_CONV_RULE which;
67     union {
68 #if YAZ_HAVE_XSLT
69         struct {
70             xsltStylesheetPtr xsp;
71         } xslt;
72 #endif
73         struct {
74             yaz_iconv_t iconv_t;
75             int input_format;
76             int output_format;
77         } marc;
78     } u;
79     struct yaz_record_conv_rule *next;
80 };
81
82 /** \brief reset rules+configuration */
83 static void yaz_record_conv_reset(yaz_record_conv_t p)
84 {
85
86     struct yaz_record_conv_rule *r;
87     for (r = p->rules; r; r = r->next)
88     {
89         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
90         {
91             if (r->u.marc.iconv_t)
92                 yaz_iconv_close(r->u.marc.iconv_t);
93         }
94 #if YAZ_HAVE_XSLT
95         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
96         {
97             xsltFreeStylesheet(r->u.xslt.xsp);
98         }
99 #endif
100     }
101     wrbuf_rewind(p->wr_error);
102     nmem_reset(p->nmem);
103
104     p->rules = 0;
105
106     p->rules_p = &p->rules;
107 }
108
109 yaz_record_conv_t yaz_record_conv_create()
110 {
111     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
112     p->nmem = nmem_create();
113     p->wr_error = wrbuf_alloc();
114     p->rules = 0;
115     p->path = 0;
116
117 #if YAZ_HAVE_EXSLT
118     exsltRegisterAll(); 
119 #endif
120     yaz_record_conv_reset(p);
121     return p;
122 }
123
124 void yaz_record_conv_destroy(yaz_record_conv_t p)
125 {
126     if (p)
127     {
128         yaz_record_conv_reset(p);
129         nmem_destroy(p->nmem);
130         wrbuf_destroy(p->wr_error);
131         xfree(p->path);
132         xfree(p);
133     }
134 }
135
136 /** \brief adds a rule */
137 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
138                                              enum YAZ_RECORD_CONV_RULE type)
139 {
140     struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
141         nmem_malloc(p->nmem, sizeof(*r));
142     r->which = type;
143     r->next = 0;
144     *p->rules_p = r;
145     p->rules_p = &r->next;
146     return r;
147 }
148
149 /** \brief parse 'xslt' conversion node */
150 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
151 {
152 #if YAZ_HAVE_XSLT
153     struct _xmlAttr *attr;
154     const char *stylesheet = 0;
155
156     for (attr = ptr->properties; attr; attr = attr->next)
157     {
158         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
159             attr->children && attr->children->type == XML_TEXT_NODE)
160             stylesheet = (const char *) attr->children->content;
161         else
162         {
163             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
164                          "Expected stylesheet.", attr->name);
165             return -1;
166         }
167     }
168     if (!stylesheet)
169     {
170         wrbuf_printf(p->wr_error, "Element <xslt>: "
171                      "attribute 'stylesheet' expected");
172         return -1;
173     }
174     else
175     {
176         char fullpath[1024];
177         xsltStylesheetPtr xsp;
178         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
179         {
180             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
181                          " could not locate stylesheet '%s' with path '%s'",
182                          stylesheet, fullpath, p->path);
183             return -1;
184         }
185         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
186         if (!xsp)
187         {
188             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
189                          " parsing stylesheet '%s' with path '%s' failed,"
190 #if YAZ_HAVE_EXSLT
191                          " EXSLT enabled",
192 #else
193                          " EXSLT not supported",
194 #endif
195                          stylesheet, fullpath, p->path);
196             return -1;
197         }
198         else
199         {
200             struct yaz_record_conv_rule *r = 
201                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
202             r->u.xslt.xsp = xsp;
203         }
204     }
205     return 0;
206 #else
207     wrbuf_printf(p->wr_error, "xslt unsupported."
208                  " YAZ compiled without XSLT support");
209     return -1;
210 #endif
211 }
212
213 /** \brief parse 'marc' conversion node */
214 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
215 {
216     struct _xmlAttr *attr;
217     const char *input_charset = 0;
218     const char *output_charset = 0;
219     const char *input_format = 0;
220     const char *output_format = 0;
221     int input_format_mode = 0;
222     int output_format_mode = 0;
223     struct yaz_record_conv_rule *r;
224     yaz_iconv_t cd = 0;
225
226     for (attr = ptr->properties; attr; attr = attr->next)
227     {
228         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
229             attr->children && attr->children->type == XML_TEXT_NODE)
230             input_charset = (const char *) attr->children->content;
231         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
232             attr->children && attr->children->type == XML_TEXT_NODE)
233             output_charset = (const char *) attr->children->content;
234         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
235             attr->children && attr->children->type == XML_TEXT_NODE)
236             input_format = (const char *) attr->children->content;
237         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
238             attr->children && attr->children->type == XML_TEXT_NODE)
239             output_format = (const char *) attr->children->content;
240         else
241         {
242             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
243                          "'inputformat', 'inputcharset', 'outputformat' or"
244                          " 'outputcharset', got attribute '%s'", 
245                          attr->name);
246             return -1;
247         }
248     }
249     if (!input_format)
250     {
251         wrbuf_printf(p->wr_error, "Element <marc>: "
252                      "attribute 'inputformat' required");
253         return -1;
254     }
255     else if (!strcmp(input_format, "marc"))
256     {
257         input_format_mode = YAZ_MARC_ISO2709;
258     }
259     else if (!strcmp(input_format, "xml"))
260     {
261         input_format_mode = YAZ_MARC_MARCXML;
262         /** Libxml2 generates UTF-8 encoding by default .
263             So we convert from UTF-8 to outputcharset (if defined) 
264         */
265         if (!input_charset && output_charset)
266             input_charset = "utf-8";
267     }
268     else
269     {
270         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
271                      " Unsupported input format"
272                      " defined by attribute value", 
273                      input_format);
274         return -1;
275     }
276     
277     if (!output_format)
278     {
279         wrbuf_printf(p->wr_error, 
280                      "Element <marc>: attribute 'outputformat' required");
281         return -1;
282     }
283     else if (!strcmp(output_format, "line"))
284     {
285         output_format_mode = YAZ_MARC_LINE;
286     }
287     else if (!strcmp(output_format, "marcxml"))
288     {
289         output_format_mode = YAZ_MARC_MARCXML;
290         if (input_charset && !output_charset)
291             output_charset = "utf-8";
292     }
293     else if (!strcmp(output_format, "marc"))
294     {
295         output_format_mode = YAZ_MARC_ISO2709;
296     }
297     else if (!strcmp(output_format, "marcxchange"))
298     {
299         output_format_mode = YAZ_MARC_XCHANGE;
300         if (input_charset && !output_charset)
301             output_charset = "utf-8";
302     }
303     else
304     {
305         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
306                      " Unsupported output format"
307                      " defined by attribute value", 
308                      output_format);
309         return -1;
310     }
311     if (input_charset && output_charset)
312     {
313         cd = yaz_iconv_open(output_charset, input_charset);
314         if (!cd)
315         {
316             wrbuf_printf(p->wr_error, 
317                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
318                          " Unsupported character set mapping"
319                          " defined by attribute values",
320                          input_charset, output_charset);
321             return -1;
322         }
323     }
324     else if (input_charset)
325     {
326         wrbuf_printf(p->wr_error, "Element <marc>: "
327                      "attribute 'outputcharset' missing");
328         return -1;
329     }
330     else if (output_charset)
331     {
332         wrbuf_printf(p->wr_error, "Element <marc>: "
333                      "attribute 'inputcharset' missing");
334         return -1;
335     }
336     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
337     r->u.marc.iconv_t = cd;
338
339     r->u.marc.input_format = input_format_mode;
340     r->u.marc.output_format = output_format_mode;
341     return 0;
342 }
343
344 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
345 {
346     yaz_record_conv_reset(p);
347
348     /* parsing element children */
349     for (ptr = ptr->children; ptr; ptr = ptr->next)
350         {
351             if (ptr->type != XML_ELEMENT_NODE)
352                 continue;
353             if (!strcmp((const char *) ptr->name, "xslt"))
354                 {
355                     if (conv_xslt(p, ptr))
356                         return -1;
357                 }
358             else if (!strcmp((const char *) ptr->name, "marc"))
359                 {
360                     if (conv_marc(p, ptr))
361                         return -1;
362                 }
363             else
364                 {
365                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
366                                  "<marc> or <xslt> element, got <%s>"
367                                  , ptr->name);
368                     return -1;
369                 }
370         }
371     return 0;
372 }
373
374 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
375                                        struct yaz_record_conv_rule *r,
376                                        const char *input_record_buf,
377                                        size_t input_record_len,
378                                        WRBUF output_record);
379
380 int yaz_record_conv_opac_record(yaz_record_conv_t p,
381                                 Z_OPACRecord *input_record,
382                                 WRBUF output_record)
383 {
384     int ret = 0;
385     struct yaz_record_conv_rule *r = p->rules;
386     WRBUF res = wrbuf_alloc();
387     yaz_marc_t mt = yaz_marc_create();
388     
389     wrbuf_rewind(p->wr_error);
390     yaz_marc_xml(mt, r->u.marc.output_format);
391     if (r->u.marc.iconv_t)
392         yaz_marc_iconv(mt, r->u.marc.iconv_t);
393     yaz_opac_decode_wrbuf(mt, input_record, res);
394     if (ret != -1)
395     {
396         ret = yaz_record_conv_record_rule(p, 
397                                           r->next,
398                                           wrbuf_buf(res), wrbuf_len(res),
399                                           output_record);
400     }
401     yaz_marc_destroy(mt);
402     wrbuf_destroy(res);
403     return ret;
404 }
405
406 int yaz_record_conv_record(yaz_record_conv_t p,
407                            const char *input_record_buf,
408                            size_t input_record_len,
409                            WRBUF output_record)
410 {
411     return yaz_record_conv_record_rule(p, p->rules,
412                                        input_record_buf,
413                                        input_record_len, output_record);
414 }
415
416 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
417                                        struct yaz_record_conv_rule *r,
418                                        const char *input_record_buf,
419                                        size_t input_record_len,
420                                        WRBUF output_record)
421 {
422     int ret = 0;
423     WRBUF record = output_record; /* pointer transfer */
424     wrbuf_rewind(p->wr_error);
425     
426     wrbuf_write(record, input_record_buf, input_record_len);
427     for (; ret == 0 && r; r = r->next)
428     {
429         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
430         {
431             yaz_marc_t mt = yaz_marc_create();
432
433             yaz_marc_xml(mt, r->u.marc.output_format);
434
435             if (r->u.marc.iconv_t)
436                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
437             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
438             {
439                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
440                                                wrbuf_len(record));
441                 if (sz > 0)
442                     ret = 0;
443                 else
444                     ret = -1;
445             }
446             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
447             {
448                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
449                                                wrbuf_len(record));
450                 if (!doc)
451                 {
452                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
453                     ret = -1;
454                 }
455                 else
456                 {
457                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
458                     if (ret)
459                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
460                 }
461                 xmlFreeDoc(doc);
462             }
463             else
464             {
465                 wrbuf_printf(p->wr_error, "unsupported input format");
466                 ret = -1;
467             }
468             if (ret == 0)
469             {
470                 wrbuf_rewind(record);
471                 ret = yaz_marc_write_mode(mt, record);
472                 if (ret)
473                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
474             }
475             yaz_marc_destroy(mt);
476         }
477 #if YAZ_HAVE_XSLT
478         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
479         {
480             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
481                                            wrbuf_len(record));
482             if (!doc)
483             {
484                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
485                 ret = -1;
486             }
487             else
488             {
489                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
490                 if (res)
491                 {
492                     xmlChar *out_buf = 0;
493                     int out_len;
494
495 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
496                     xsltSaveResultToString(&out_buf, &out_len, res,
497                                            r->u.xslt.xsp); 
498 #else
499                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
500 #endif
501                     if (!out_buf)
502                     {
503                         wrbuf_printf(p->wr_error,
504                                      "xsltSaveResultToString failed");
505                         ret = -1;
506                     }
507                     else
508                     {
509                         wrbuf_rewind(record);
510                         wrbuf_write(record, (const char *) out_buf, out_len);
511                         
512                         xmlFree(out_buf);
513                     }
514                     xmlFreeDoc(res);
515                 }
516                 else
517                 {
518                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
519                     ret = -1;
520                 }
521                 xmlFreeDoc(doc);
522             }
523         }
524 #endif
525     }
526     return ret;
527 }
528
529 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
530 {
531     return wrbuf_cstr(p->wr_error);
532 }
533
534 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
535 {
536     xfree(p->path);
537     p->path = 0;
538     if (path)
539         p->path = xstrdup(path);
540 }
541 #endif
542
543 /*
544  * Local variables:
545  * c-basic-offset: 4
546  * indent-tabs-mode: nil
547  * End:
548  * vim: shiftwidth=4 tabstop=8 expandtab
549  */
550