Add detail logging of record create/destroy
[yaz-moved-to-github.git] / src / record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_conv.c
7  * \brief Record Conversions utility
8  */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
20 #include <yaz/nmem.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23
24 #if YAZ_HAVE_XML2
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
28 #if YAZ_HAVE_XSLT
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31 #endif
32 #if YAZ_HAVE_EXSLT
33 #include <libexslt/exslt.h>
34 #endif
35
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38     /** \brief memory for configuration */
39     NMEM nmem;
40
41     /** \brief conversion rules (allocated using NMEM) */
42     struct yaz_record_conv_rule *rules;
43
44     /** \brief pointer to last conversion rule pointer in chain */
45     struct yaz_record_conv_rule **rules_p;
46
47     /** \brief string buffer for error messages */
48     WRBUF wr_error;
49
50     /** \brief path for opening files  */
51     char *path;
52 };
53
54 /** \brief tranformation types (rule types) */
55 enum YAZ_RECORD_CONV_RULE 
56 {
57     YAZ_RECORD_CONV_RULE_XSLT,
58     YAZ_RECORD_CONV_RULE_MARC
59 };
60
61 /** \brief tranformation info (rule info) */
62 struct yaz_record_conv_rule {
63     enum YAZ_RECORD_CONV_RULE which;
64     union {
65 #if YAZ_HAVE_XSLT
66         struct {
67             xmlDocPtr xsp_doc;
68         } xslt;
69 #endif
70         struct {
71             const char *input_charset;
72             const char *output_charset;
73             int input_format;
74             int output_format;
75         } marc;
76     } u;
77     struct yaz_record_conv_rule *next;
78 };
79
80 /** \brief reset rules+configuration */
81 static void yaz_record_conv_reset(yaz_record_conv_t p)
82 {
83
84     struct yaz_record_conv_rule *r;
85     for (r = p->rules; r; r = r->next)
86     {
87         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
88         {
89             ;
90         }
91 #if YAZ_HAVE_XSLT
92         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
93         {
94             xmlFreeDoc(r->u.xslt.xsp_doc);
95         }
96 #endif
97     }
98     wrbuf_rewind(p->wr_error);
99     nmem_reset(p->nmem);
100
101     p->rules = 0;
102
103     p->rules_p = &p->rules;
104 }
105
106 yaz_record_conv_t yaz_record_conv_create()
107 {
108     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
109     p->nmem = nmem_create();
110     p->wr_error = wrbuf_alloc();
111     p->rules = 0;
112     p->path = 0;
113
114 #if YAZ_HAVE_EXSLT
115     exsltRegisterAll(); 
116 #endif
117     yaz_record_conv_reset(p);
118     return p;
119 }
120
121 void yaz_record_conv_destroy(yaz_record_conv_t p)
122 {
123     if (p)
124     {
125         yaz_record_conv_reset(p);
126         nmem_destroy(p->nmem);
127         wrbuf_destroy(p->wr_error);
128         xfree(p->path);
129         xfree(p);
130     }
131 }
132
133 /** \brief adds a rule */
134 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
135                                              enum YAZ_RECORD_CONV_RULE type)
136 {
137     struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *)
138         nmem_malloc(p->nmem, sizeof(*r));
139     r->which = type;
140     r->next = 0;
141     *p->rules_p = r;
142     p->rules_p = &r->next;
143     return r;
144 }
145
146 /** \brief parse 'xslt' conversion node */
147 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
148 {
149 #if YAZ_HAVE_XSLT
150     struct _xmlAttr *attr;
151     const char *stylesheet = 0;
152
153     for (attr = ptr->properties; attr; attr = attr->next)
154     {
155         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
156             attr->children && attr->children->type == XML_TEXT_NODE)
157             stylesheet = (const char *) attr->children->content;
158         else
159         {
160             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
161                          "Expected stylesheet.", attr->name);
162             return -1;
163         }
164     }
165     if (!stylesheet)
166     {
167         wrbuf_printf(p->wr_error, "Element <xslt>: "
168                      "attribute 'stylesheet' expected");
169         return -1;
170     }
171     else
172     {
173         char fullpath[1024];
174         xsltStylesheetPtr xsp;
175         xmlDocPtr xsp_doc;
176         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
177         {
178             wrbuf_printf(p->wr_error, "Element <xslt stylesheet=\"%s\"/>:"
179                          " could not locate stylesheet '%s'",
180                          stylesheet, fullpath);
181             if (p->path)
182                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
183                 
184             return -1;
185         }
186         xsp_doc = xmlParseFile(fullpath);
187         if (!xsp_doc)
188         {
189             wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
190                          " xml parse failed: %s", stylesheet, fullpath);
191             if (p->path)
192                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
193             return -1;
194         }
195         xsp = xsltParseStylesheetDoc(xsp_doc);
196         if (!xsp)
197         {
198             wrbuf_printf(p->wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
199                          " xslt parse failed: %s", stylesheet, fullpath);
200             if (p->path)
201                 wrbuf_printf(p->wr_error, " with path '%s'", p->path);
202             wrbuf_printf(p->wr_error, " ("
203 #if YAZ_HAVE_EXSLT
204                          
205                          "EXSLT enabled"
206 #else
207                          "EXSLT not supported"
208 #endif
209                          ")");
210             return -1;
211         }
212         else
213         {
214             struct yaz_record_conv_rule *r = 
215                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
216             r->u.xslt.xsp_doc = xmlCopyDoc(xsp_doc, 1);
217             xsltFreeStylesheet(xsp); /* will free xsp_doc */
218         }
219     }
220     return 0;
221 #else
222     wrbuf_printf(p->wr_error, "xslt unsupported."
223                  " YAZ compiled without XSLT support");
224     return -1;
225 #endif
226 }
227
228 /** \brief parse 'marc' conversion node */
229 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
230 {
231     struct _xmlAttr *attr;
232     const char *input_charset = 0;
233     const char *output_charset = 0;
234     const char *input_format = 0;
235     const char *output_format = 0;
236     int input_format_mode = 0;
237     int output_format_mode = 0;
238     struct yaz_record_conv_rule *r;
239
240     for (attr = ptr->properties; attr; attr = attr->next)
241     {
242         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
243             attr->children && attr->children->type == XML_TEXT_NODE)
244             input_charset = (const char *) attr->children->content;
245         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
246             attr->children && attr->children->type == XML_TEXT_NODE)
247             output_charset = (const char *) attr->children->content;
248         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
249             attr->children && attr->children->type == XML_TEXT_NODE)
250             input_format = (const char *) attr->children->content;
251         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
252             attr->children && attr->children->type == XML_TEXT_NODE)
253             output_format = (const char *) attr->children->content;
254         else
255         {
256             wrbuf_printf(p->wr_error, "Element <marc>: expected attributes"
257                          "'inputformat', 'inputcharset', 'outputformat' or"
258                          " 'outputcharset', got attribute '%s'", 
259                          attr->name);
260             return -1;
261         }
262     }
263     if (!input_format)
264     {
265         wrbuf_printf(p->wr_error, "Element <marc>: "
266                      "attribute 'inputformat' required");
267         return -1;
268     }
269     else if (!strcmp(input_format, "marc"))
270     {
271         input_format_mode = YAZ_MARC_ISO2709;
272     }
273     else if (!strcmp(input_format, "xml"))
274     {
275         input_format_mode = YAZ_MARC_MARCXML;
276         /** Libxml2 generates UTF-8 encoding by default .
277             So we convert from UTF-8 to outputcharset (if defined) 
278         */
279         if (!input_charset && output_charset)
280             input_charset = "utf-8";
281     }
282     else
283     {
284         wrbuf_printf(p->wr_error, "Element <marc inputformat='%s'>: "
285                      " Unsupported input format"
286                      " defined by attribute value", 
287                      input_format);
288         return -1;
289     }
290     
291     if (!output_format)
292     {
293         wrbuf_printf(p->wr_error, 
294                      "Element <marc>: attribute 'outputformat' required");
295         return -1;
296     }
297     else if (!strcmp(output_format, "line"))
298     {
299         output_format_mode = YAZ_MARC_LINE;
300     }
301     else if (!strcmp(output_format, "marcxml"))
302     {
303         output_format_mode = YAZ_MARC_MARCXML;
304         if (input_charset && !output_charset)
305             output_charset = "utf-8";
306     }
307     else if (!strcmp(output_format, "turbomarc"))
308     {
309         output_format_mode = YAZ_MARC_TURBOMARC;
310         if (input_charset && !output_charset)
311             output_charset = "utf-8";
312     }
313     else if (!strcmp(output_format, "marc"))
314     {
315         output_format_mode = YAZ_MARC_ISO2709;
316     }
317     else if (!strcmp(output_format, "marcxchange"))
318     {
319         output_format_mode = YAZ_MARC_XCHANGE;
320         if (input_charset && !output_charset)
321             output_charset = "utf-8";
322     }
323     else
324     {
325         wrbuf_printf(p->wr_error, "Element <marc outputformat='%s'>: "
326                      " Unsupported output format"
327                      " defined by attribute value", 
328                      output_format);
329         return -1;
330     }
331     if (input_charset && output_charset)
332     {
333         yaz_iconv_t cd = yaz_iconv_open(output_charset, input_charset);
334         if (!cd)
335         {
336             wrbuf_printf(p->wr_error, 
337                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
338                          " Unsupported character set mapping"
339                          " defined by attribute values",
340                          input_charset, output_charset);
341             return -1;
342         }
343         yaz_iconv_close(cd);
344     }
345     else if (input_charset)
346     {
347         wrbuf_printf(p->wr_error, "Element <marc>: "
348                      "attribute 'outputcharset' missing");
349         return -1;
350     }
351     else if (output_charset)
352     {
353         wrbuf_printf(p->wr_error, "Element <marc>: "
354                      "attribute 'inputcharset' missing");
355         return -1;
356     }
357     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
358
359     r->u.marc.input_charset = nmem_strdup(p->nmem, input_charset);
360     r->u.marc.output_charset = nmem_strdup(p->nmem, output_charset);
361     r->u.marc.input_format = input_format_mode;
362     r->u.marc.output_format = output_format_mode;
363     return 0;
364 }
365
366 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
367 {
368     yaz_record_conv_reset(p);
369
370     /* parsing element children */
371     for (ptr = ptr->children; ptr; ptr = ptr->next)
372         {
373             if (ptr->type != XML_ELEMENT_NODE)
374                 continue;
375             if (!strcmp((const char *) ptr->name, "xslt"))
376                 {
377                     if (conv_xslt(p, ptr))
378                         return -1;
379                 }
380             else if (!strcmp((const char *) ptr->name, "marc"))
381                 {
382                     if (conv_marc(p, ptr))
383                         return -1;
384                 }
385             else
386                 {
387                     wrbuf_printf(p->wr_error, "Element <backend>: expected "
388                                  "<marc> or <xslt> element, got <%s>"
389                                  , ptr->name);
390                     return -1;
391                 }
392         }
393     return 0;
394 }
395
396 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
397                                        struct yaz_record_conv_rule *r,
398                                        const char *input_record_buf,
399                                        size_t input_record_len,
400                                        WRBUF output_record);
401
402 int yaz_record_conv_opac_record(yaz_record_conv_t p,
403                                 Z_OPACRecord *input_record,
404                                 WRBUF output_record)
405 {
406     int ret = 0;
407     struct yaz_record_conv_rule *r = p->rules;
408     if (!r || r->which != YAZ_RECORD_CONV_RULE_MARC)
409         ret = -1; /* no marc rule so we can't do OPAC */
410     else
411     {
412         WRBUF res = wrbuf_alloc();
413         yaz_marc_t mt = yaz_marc_create();
414         yaz_iconv_t cd = yaz_iconv_open(r->u.marc.output_charset,
415                                         r->u.marc.input_charset);
416         
417         wrbuf_rewind(p->wr_error);
418         yaz_marc_xml(mt, r->u.marc.output_format);
419         
420         yaz_marc_iconv(mt, cd);
421         
422         yaz_opac_decode_wrbuf(mt, input_record, res);
423         if (ret != -1)
424         {
425             ret = yaz_record_conv_record_rule(p, 
426                                               r->next,
427                                               wrbuf_buf(res), wrbuf_len(res),
428                                               output_record);
429         }
430         yaz_marc_destroy(mt);
431         if (cd)
432             yaz_iconv_close(cd);
433         wrbuf_destroy(res);
434     }
435     return ret;
436 }
437
438 int yaz_record_conv_record(yaz_record_conv_t p,
439                            const char *input_record_buf,
440                            size_t input_record_len,
441                            WRBUF output_record)
442 {
443     return yaz_record_conv_record_rule(p, p->rules,
444                                        input_record_buf,
445                                        input_record_len, output_record);
446 }
447
448 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
449                                        struct yaz_record_conv_rule *r,
450                                        const char *input_record_buf,
451                                        size_t input_record_len,
452                                        WRBUF output_record)
453 {
454     int ret = 0;
455     WRBUF record = output_record; /* pointer transfer */
456     wrbuf_rewind(p->wr_error);
457     
458     wrbuf_write(record, input_record_buf, input_record_len);
459     for (; ret == 0 && r; r = r->next)
460     {
461         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
462         {
463             yaz_iconv_t cd = 
464                 yaz_iconv_open(r->u.marc.output_charset,
465                                r->u.marc.input_charset);
466             yaz_marc_t mt = yaz_marc_create();
467
468             yaz_marc_xml(mt, r->u.marc.output_format);
469
470             if (cd)
471                 yaz_marc_iconv(mt, cd);
472             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
473             {
474                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
475                                                wrbuf_len(record));
476                 if (sz > 0)
477                     ret = 0;
478                 else
479                     ret = -1;
480             }
481             else if (r->u.marc.input_format == YAZ_MARC_MARCXML ||
482                      r->u.marc.input_format == YAZ_MARC_TURBOMARC)
483             {
484                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
485                                                wrbuf_len(record));
486                 if (!doc)
487                 {
488                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
489                     ret = -1;
490                 }
491                 else
492                 {
493                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
494                     if (ret)
495                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
496                 }
497                 xmlFreeDoc(doc);
498             }
499             else
500             {
501                 wrbuf_printf(p->wr_error, "unsupported input format");
502                 ret = -1;
503             }
504             if (ret == 0)
505             {
506                 wrbuf_rewind(record);
507                 ret = yaz_marc_write_mode(mt, record);
508                 if (ret)
509                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
510             }
511             if (cd)
512                 yaz_iconv_close(cd);
513             yaz_marc_destroy(mt);
514         }
515 #if YAZ_HAVE_XSLT
516         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
517         {
518             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
519                                            wrbuf_len(record));
520             if (!doc)
521             {
522                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
523                 ret = -1;
524             }
525             else
526             {
527                 xmlDocPtr xsp_doc = xmlCopyDoc(r->u.xslt.xsp_doc, 1);
528                 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
529                 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
530                 if (res)
531                 {
532                     xmlChar *out_buf = 0;
533                     int out_len;
534
535 #if YAZ_HAVE_XSLTSAVERESULTTOSTRING
536                     xsltSaveResultToString(&out_buf, &out_len, res, xsp);
537 #else
538                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
539 #endif
540                     if (!out_buf)
541                     {
542                         wrbuf_printf(p->wr_error,
543                                      "xsltSaveResultToString failed");
544                         ret = -1;
545                     }
546                     else
547                     {
548                         wrbuf_rewind(record);
549                         wrbuf_write(record, (const char *) out_buf, out_len);
550                         
551                         xmlFree(out_buf);
552                     }
553                     xmlFreeDoc(res);
554                 }
555                 else
556                 {
557                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
558                     ret = -1;
559                 }
560                 xmlFreeDoc(doc);
561                 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
562             }
563         }
564 #endif
565     }
566     return ret;
567 }
568
569 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
570 {
571     return wrbuf_cstr(p->wr_error);
572 }
573
574 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
575 {
576     xfree(p->path);
577     p->path = 0;
578     if (path)
579         p->path = xstrdup(path);
580 }
581 #endif
582
583 /*
584  * Local variables:
585  * c-basic-offset: 4
586  * c-file-style: "Stroustrup"
587  * indent-tabs-mode: nil
588  * End:
589  * vim: shiftwidth=4 tabstop=8 expandtab
590  */
591