record_conv: change construct prototype
[yaz-moved-to-github.git] / src / record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2012 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_conv.c
7  * \brief Record Conversions utility
8  */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
20 #include <yaz/nmem.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
23
24 #if YAZ_HAVE_XML2
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
28 #if YAZ_HAVE_XSLT
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31 #endif
32 #if YAZ_HAVE_EXSLT
33 #include <libexslt/exslt.h>
34 #endif
35
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38     /** \brief memory for configuration */
39     NMEM nmem;
40
41     /** \brief conversion rules (allocated using NMEM) */
42     struct yaz_record_conv_rule *rules;
43
44     /** \brief pointer to last conversion rule pointer in chain */
45     struct yaz_record_conv_rule **rules_p;
46
47     /** \brief string buffer for error messages */
48     WRBUF wr_error;
49
50     /** \brief path for opening files  */
51     char *path;
52 };
53
54 struct marc_info {
55     NMEM nmem;
56     const char *input_charset;
57     const char *output_charset;
58     int input_format_mode;
59     int output_format_mode;
60 };
61
62 /** \brief tranformation info (rule info) */
63 struct yaz_record_conv_rule {
64     struct yaz_record_conv_type *type;
65     void *info;
66     struct yaz_record_conv_rule *next;
67 };
68
69 /** \brief reset rules+configuration */
70 static void yaz_record_conv_reset(yaz_record_conv_t p)
71 {
72
73     struct yaz_record_conv_rule *r;
74     for (r = p->rules; r; r = r->next)
75     {
76         r->type->destroy(r->info);
77     }
78     wrbuf_rewind(p->wr_error);
79     nmem_reset(p->nmem);
80
81     p->rules = 0;
82
83     p->rules_p = &p->rules;
84 }
85
86 void yaz_record_conv_destroy(yaz_record_conv_t p)
87 {
88     if (p)
89     {
90         yaz_record_conv_reset(p);
91         nmem_destroy(p->nmem);
92         wrbuf_destroy(p->wr_error);
93
94         xfree(p->path);
95         xfree(p);
96     }
97 }
98
99 #if YAZ_HAVE_XSLT
100 static void *construct_xslt(const xmlNode *ptr,
101                             const char *path, WRBUF wr_error)
102 {
103     struct _xmlAttr *attr;
104     const char *stylesheet = 0;
105
106     if (strcmp((const char *) ptr->name, "xslt"))
107         return 0;
108
109     for (attr = ptr->properties; attr; attr = attr->next)
110     {
111         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
112             attr->children && attr->children->type == XML_TEXT_NODE)
113             stylesheet = (const char *) attr->children->content;
114         else
115         {
116             wrbuf_printf(wr_error, "Bad attribute '%s'"
117                          "Expected stylesheet.", attr->name);
118             return 0;
119         }
120     }
121     if (!stylesheet)
122     {
123         wrbuf_printf(wr_error, "Element <xslt>: "
124                      "attribute 'stylesheet' expected");
125         return 0;
126     }
127     else
128     {
129         char fullpath[1024];
130         xsltStylesheetPtr xsp;
131         xmlDocPtr xsp_doc;
132         if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
133         {
134             wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
135                          " could not locate stylesheet '%s'",
136                          stylesheet, stylesheet);
137             if (path)
138                 wrbuf_printf(wr_error, " with path '%s'", path);
139                 
140             return 0;
141         }
142         xsp_doc = xmlParseFile(fullpath);
143         if (!xsp_doc)
144         {
145             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
146                          " xml parse failed: %s", stylesheet, fullpath);
147             if (path)
148                 wrbuf_printf(wr_error, " with path '%s'", path);
149             return 0;
150         }
151         /* need to copy this before passing it to the processor. It will
152            be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
153         xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
154         if (!xsp)
155         {
156             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
157                          " xslt parse failed: %s", stylesheet, fullpath);
158             if (path)
159                 wrbuf_printf(wr_error, " with path '%s'", path);
160             wrbuf_printf(wr_error, " ("
161 #if YAZ_HAVE_EXSLT
162                          
163                          "EXSLT enabled"
164 #else
165                          "EXSLT not supported"
166 #endif
167                          ")");
168             xmlFreeDoc(xsp_doc);
169             return 0;
170         }
171         else
172         {
173             xsltFreeStylesheet(xsp);
174             return xsp_doc;
175         }
176     }
177     return 0;
178 }
179
180 static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
181 {
182     int ret = 0;
183     xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
184                                    wrbuf_len(record));
185     if (!doc)
186     {
187         wrbuf_printf(wr_error, "xmlParseMemory failed");
188         ret = -1;
189     }
190     else
191     {
192         xmlDocPtr xsp_doc = xmlCopyDoc((xmlDocPtr) info, 1);
193         xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
194         xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
195         if (res)
196         {
197             xmlChar *out_buf = 0;
198             int out_len;
199             
200 #if HAVE_XSLTSAVERESULTTOSTRING
201             xsltSaveResultToString(&out_buf, &out_len, res, xsp);
202 #else
203             xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
204 #endif
205             if (!out_buf)
206             {
207                 wrbuf_printf(wr_error,
208                              "xsltSaveResultToString failed");
209                 ret = -1;
210             }
211             else
212             {
213                 wrbuf_rewind(record);
214                 wrbuf_write(record, (const char *) out_buf, out_len);
215                 
216                 xmlFree(out_buf);
217             }
218             xmlFreeDoc(res);
219         }
220         else
221         {
222             wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
223             ret = -1;
224         }
225         xmlFreeDoc(doc);
226         xsltFreeStylesheet(xsp); /* frees xsp_doc too */
227     }
228     return ret;
229 }
230
231 static void destroy_xslt(void *info)
232 {
233     if (info)
234     {
235         xmlDocPtr xsp_doc = info;
236         xmlFreeDoc(xsp_doc);
237     }
238 }
239
240 /* YAZ_HAVE_XSLT */
241 #endif
242
243
244 static void *construct_marc(const xmlNode *ptr,
245                             const char *path, WRBUF wr_error)
246 {
247     NMEM nmem = nmem_create();
248     struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
249     struct _xmlAttr *attr;
250     const char *input_format = 0;
251     const char *output_format = 0;
252
253     if (strcmp((const char *) ptr->name, "marc"))
254     {
255         nmem_destroy(nmem);
256         return 0;
257     }
258
259     info->nmem = nmem;
260     info->input_charset = 0;
261     info->output_charset = 0;
262     info->input_format_mode = 0;
263     info->output_format_mode = 0;
264
265     for (attr = ptr->properties; attr; attr = attr->next)
266     {
267         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
268             attr->children && attr->children->type == XML_TEXT_NODE)
269             info->input_charset = (const char *) attr->children->content;
270         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
271             attr->children && attr->children->type == XML_TEXT_NODE)
272             info->output_charset = (const char *) attr->children->content;
273         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
274             attr->children && attr->children->type == XML_TEXT_NODE)
275             input_format = (const char *) attr->children->content;
276         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
277             attr->children && attr->children->type == XML_TEXT_NODE)
278             output_format = (const char *) attr->children->content;
279         else
280         {
281             wrbuf_printf(wr_error, "Element <marc>: expected attributes"
282                          "'inputformat', 'inputcharset', 'outputformat' or"
283                          " 'outputcharset', got attribute '%s'", 
284                          attr->name);
285             nmem_destroy(info->nmem);
286             return 0;
287         }
288     }
289     if (!input_format)
290     {
291         wrbuf_printf(wr_error, "Element <marc>: "
292                      "attribute 'inputformat' required");
293         nmem_destroy(info->nmem);
294         return 0;
295     }
296     else if (!strcmp(input_format, "marc"))
297     {
298         info->input_format_mode = YAZ_MARC_ISO2709;
299     }
300     else if (!strcmp(input_format, "xml"))
301     {
302         info->input_format_mode = YAZ_MARC_MARCXML;
303         /** Libxml2 generates UTF-8 encoding by default .
304             So we convert from UTF-8 to outputcharset (if defined) 
305         */
306         if (!info->input_charset && info->output_charset)
307             info->input_charset = "utf-8";
308     }
309     else
310     {
311         wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
312                      " Unsupported input format"
313                      " defined by attribute value", 
314                      input_format);
315         nmem_destroy(info->nmem);
316         return 0;
317     }
318     
319     if (!output_format)
320     {
321         wrbuf_printf(wr_error, 
322                      "Element <marc>: attribute 'outputformat' required");
323         nmem_destroy(info->nmem);
324         return 0;
325     }
326     else if (!strcmp(output_format, "line"))
327     {
328         info->output_format_mode = YAZ_MARC_LINE;
329     }
330     else if (!strcmp(output_format, "marcxml"))
331     {
332         info->output_format_mode = YAZ_MARC_MARCXML;
333         if (info->input_charset && !info->output_charset)
334             info->output_charset = "utf-8";
335     }
336     else if (!strcmp(output_format, "turbomarc"))
337     {
338         info->output_format_mode = YAZ_MARC_TURBOMARC;
339         if (info->input_charset && !info->output_charset)
340             info->output_charset = "utf-8";
341     }
342     else if (!strcmp(output_format, "marc"))
343     {
344         info->output_format_mode = YAZ_MARC_ISO2709;
345     }
346     else if (!strcmp(output_format, "marcxchange"))
347     {
348         info->output_format_mode = YAZ_MARC_XCHANGE;
349         if (info->input_charset && !info->output_charset)
350             info->output_charset = "utf-8";
351     }
352     else
353     {
354         wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
355                      " Unsupported output format"
356                      " defined by attribute value", 
357                      output_format);
358         nmem_destroy(info->nmem);
359         return 0;
360     }
361     if (info->input_charset && info->output_charset)
362     {
363         yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
364                                         info->input_charset);
365         if (!cd)
366         {
367             wrbuf_printf(wr_error, 
368                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
369                          " Unsupported character set mapping"
370                          " defined by attribute values",
371                          info->input_charset, info->output_charset);
372             nmem_destroy(info->nmem);
373             return 0;
374         }
375         yaz_iconv_close(cd);
376     }
377     else if (info->input_charset)
378     {
379         wrbuf_printf(wr_error, "Element <marc>: "
380                      "attribute 'outputcharset' missing");
381         nmem_destroy(info->nmem);
382         return 0;
383     }
384     else if (info->output_charset)
385     {
386         wrbuf_printf(wr_error, "Element <marc>: "
387                      "attribute 'inputcharset' missing");
388         nmem_destroy(info->nmem);
389         return 0;
390     }
391     info->input_charset = nmem_strdup(info->nmem, info->input_charset);
392     info->output_charset = nmem_strdup(info->nmem, info->output_charset);
393     return info;
394 }
395
396 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
397 {
398     struct marc_info *mi = info;
399     int ret = 0;
400     
401     yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
402     yaz_marc_t mt = yaz_marc_create();
403     
404     yaz_marc_xml(mt, mi->output_format_mode);
405     
406     if (cd)
407         yaz_marc_iconv(mt, cd);
408     if (mi->input_format_mode == YAZ_MARC_ISO2709)
409     {
410         int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
411                                        wrbuf_len(record));
412         if (sz > 0)
413             ret = 0;
414         else
415             ret = -1;
416     }
417     else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
418              mi->input_format_mode == YAZ_MARC_TURBOMARC)
419     {
420         xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
421                                        wrbuf_len(record));
422         if (!doc)
423         {
424             wrbuf_printf(wr_error, "xmlParseMemory failed");
425             ret = -1;
426         }
427         else
428         {
429             ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
430             if (ret)
431                 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
432         }
433         xmlFreeDoc(doc);
434     }
435     else
436     {
437         wrbuf_printf(wr_error, "unsupported input format");
438         ret = -1;
439     }
440     if (ret == 0)
441     {
442         wrbuf_rewind(record);
443         ret = yaz_marc_write_mode(mt, record);
444         if (ret)
445             wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
446     }
447     if (cd)
448         yaz_iconv_close(cd);
449     yaz_marc_destroy(mt);
450     return ret;
451 }
452
453 static void destroy_marc(void *info)
454 {
455     struct marc_info *mi = info;
456     
457     nmem_destroy(mi->nmem);
458 }
459
460 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
461                                 struct yaz_record_conv_type *types)
462 {
463     struct yaz_record_conv_type bt[2];
464     
465     /* register marc */
466     bt[0].construct = construct_marc;
467     bt[0].convert = convert_marc;
468     bt[0].destroy = destroy_marc;
469
470 #if YAZ_HAVE_XSLT
471     /* register xslt */
472     bt[0].next = &bt[1];
473     bt[1].next = types;
474     bt[1].construct = construct_xslt;
475     bt[1].convert = convert_xslt;
476     bt[1].destroy = destroy_xslt;
477 #else
478     bt[0].next = types;
479 #endif
480     
481     yaz_record_conv_reset(p);
482
483     /* parsing element children */
484     for (ptr = ptr->children; ptr; ptr = ptr->next)
485     {
486         struct yaz_record_conv_type *t;
487         struct yaz_record_conv_rule *r;
488         void *info = 0;
489         if (ptr->type != XML_ELEMENT_NODE)
490             continue;
491         for (t = &bt[0]; t; t = t->next)
492         {
493             wrbuf_rewind(p->wr_error);
494             info = t->construct(ptr, p->path, p->wr_error);
495
496             if (info || wrbuf_len(p->wr_error))
497                 break;
498             /* info== 0 and no error reported , ie not handled by it */
499         }
500         if (!info)
501         {
502             if (wrbuf_len(p->wr_error) == 0)
503                 wrbuf_printf(p->wr_error, "Element <backend>: expected "
504                              "<marc> or <xslt> element, got <%s>"
505                              , ptr->name);
506             return -1;
507         }
508         r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
509         r->next = 0;
510         r->info = info;
511         r->type = nmem_malloc(p->nmem, sizeof(*t));
512         memcpy(r->type, t, sizeof(*t));
513         *p->rules_p = r;
514         p->rules_p = &r->next;
515     }
516     return 0;
517 }
518
519 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
520 {
521     return yaz_record_conv_configure_t(p, ptr, 0);
522 }
523
524 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
525                                        struct yaz_record_conv_rule *r,
526                                        const char *input_record_buf,
527                                        size_t input_record_len,
528                                        WRBUF output_record)
529 {
530     int ret = 0;
531     WRBUF record = output_record; /* pointer transfer */
532     wrbuf_rewind(p->wr_error);
533     
534     wrbuf_write(record, input_record_buf, input_record_len);
535     for (; ret == 0 && r; r = r->next)
536         ret = r->type->convert(r->info, record, p->wr_error);
537     return ret;
538 }
539
540 int yaz_record_conv_opac_record(yaz_record_conv_t p,
541                                 Z_OPACRecord *input_record,
542                                 WRBUF output_record)
543 {
544     int ret = 0;
545     struct yaz_record_conv_rule *r = p->rules;
546     if (!r || r->type->construct != construct_marc)
547         ret = -1; /* no marc rule so we can't do OPAC */
548     else
549     {
550         struct marc_info *mi = r->info;
551
552         WRBUF res = wrbuf_alloc();
553         yaz_marc_t mt = yaz_marc_create();
554         yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
555                                         mi->input_charset);
556         
557         wrbuf_rewind(p->wr_error);
558         yaz_marc_xml(mt, mi->output_format_mode);
559         
560         yaz_marc_iconv(mt, cd);
561         
562         yaz_opac_decode_wrbuf(mt, input_record, res);
563         if (ret != -1)
564         {
565             ret = yaz_record_conv_record_rule(p, 
566                                               r->next,
567                                               wrbuf_buf(res), wrbuf_len(res),
568                                               output_record);
569         }
570         yaz_marc_destroy(mt);
571         if (cd)
572             yaz_iconv_close(cd);
573         wrbuf_destroy(res);
574     }
575     return ret;
576 }
577
578 int yaz_record_conv_record(yaz_record_conv_t p,
579                            const char *input_record_buf,
580                            size_t input_record_len,
581                            WRBUF output_record)
582 {
583     return yaz_record_conv_record_rule(p, p->rules,
584                                        input_record_buf,
585                                        input_record_len, output_record);
586 }
587
588 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
589 {
590     return wrbuf_cstr(p->wr_error);
591 }
592
593 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
594 {
595     xfree(p->path);
596     p->path = 0;
597     if (path)
598         p->path = xstrdup(path);
599 }
600
601 yaz_record_conv_t yaz_record_conv_create()
602 {
603     yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
604     p->nmem = nmem_create();
605     p->wr_error = wrbuf_alloc();
606     p->rules = 0;
607     p->path = 0;
608 #if YAZ_HAVE_EXSLT
609     exsltRegisterAll(); 
610 #endif    
611     return p;
612 }
613
614 /* YAZ_HAVE_XML2 */
615 #endif
616
617 /*
618  * Local variables:
619  * c-basic-offset: 4
620  * c-file-style: "Stroustrup"
621  * indent-tabs-mode: nil
622  * End:
623  * vim: shiftwidth=4 tabstop=8 expandtab
624  */
625