8c00ebb88a97fed9e7bc3d14139f01356f9e7c23
[yaz-moved-to-github.git] / src / record_conv.c
1 /*
2  * Copyright (C) 2005-2006, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: record_conv.c,v 1.6 2006-05-07 17:45:41 adam Exp $
6  */
7 /**
8  * \file record_conv.c
9  * \brief Record Conversions utility
10  */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <string.h>
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
22 #include <yaz/nmem.h>
23 #include <yaz/tpath.h>
24
25 #if HAVE_XSLT
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
31
32 /** \brief The internal structure for yaz_record_conv_t */
33 struct yaz_record_conv_struct {
34     /** \brief memory for configuration */
35     NMEM nmem;
36
37     /** \brief conversion rules (allocated using NMEM) */
38     struct yaz_record_conv_rule *rules;
39
40     /** \brief pointer to last conversion rule pointer in chain */
41     struct yaz_record_conv_rule **rules_p;
42
43     /** \brief string buffer for error messages */
44     WRBUF wr_error;
45
46     /** \brief path for opening files  */
47     char *path;
48 };
49
50 /** \brief tranformation types (rule types) */
51 enum YAZ_RECORD_CONV_RULE 
52 {
53     YAZ_RECORD_CONV_RULE_XSLT,
54     YAZ_RECORD_CONV_RULE_MARC
55 };
56
57
58 /** \brief tranformation info (rule info) */
59 struct yaz_record_conv_rule {
60     enum YAZ_RECORD_CONV_RULE which;
61     union {
62         struct {
63             xsltStylesheetPtr xsp;
64             int dummy;
65         } xslt;
66         struct {
67             yaz_iconv_t iconv_t;
68             int input_format;
69             int output_format;
70         } marc;
71     } u;
72     struct yaz_record_conv_rule *next;
73 };
74
75 /** \brief reset rules+configuration */
76 static void yaz_record_conv_reset(yaz_record_conv_t p)
77 {
78     struct yaz_record_conv_rule *r;
79     for (r = p->rules; r; r = r->next)
80     {
81         if (r->which == YAZ_RECORD_CONV_RULE_MARC)
82         {
83             if (r->u.marc.iconv_t)
84                 yaz_iconv_close(r->u.marc.iconv_t);
85         }
86         else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
87         {
88             xsltFreeStylesheet(r->u.xslt.xsp);
89         }
90     }
91     wrbuf_rewind(p->wr_error);
92     nmem_reset(p->nmem);
93
94     p->rules = 0;
95
96     p->rules_p = &p->rules;
97 }
98
99 yaz_record_conv_t yaz_record_conv_create()
100 {
101     yaz_record_conv_t p = xmalloc(sizeof(*p));
102     p->nmem = nmem_create();
103     p->wr_error = wrbuf_alloc();
104     p->rules = 0;
105     p->path = 0;
106
107     yaz_record_conv_reset(p);
108     return p;
109 }
110
111 void yaz_record_conv_destroy(yaz_record_conv_t p)
112 {
113     if (p)
114     {
115         yaz_record_conv_reset(p);
116         nmem_destroy(p->nmem);
117         wrbuf_free(p->wr_error, 1);
118         xfree(p->path);
119         xfree(p);
120     }
121 }
122
123 /** \brief adds a rule */
124 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
125                                              enum YAZ_RECORD_CONV_RULE type)
126 {
127     struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
128     r->which = type;
129     r->next = 0;
130     *p->rules_p = r;
131     p->rules_p = &r->next;
132     return r;
133 }
134
135 /** \brief parse 'xslt' conversion node */
136 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
137 {
138     struct _xmlAttr *attr;
139     const char *stylesheet = 0;
140
141     for (attr = ptr->properties; attr; attr = attr->next)
142     {
143         if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
144             attr->children && attr->children->type == XML_TEXT_NODE)
145             stylesheet = (const char *) attr->children->content;
146         else
147         {
148             wrbuf_printf(p->wr_error, "Bad attribute '%s'"
149                          "Expected stylesheet.", attr->name);
150             return -1;
151         }
152     }
153     if (!stylesheet)
154     {
155         wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
156         return -1;
157     }
158     else
159     {
160         char fullpath[1024];
161         xsltStylesheetPtr xsp;
162         if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
163         {
164             wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
165                          stylesheet, p->path);
166             return -1;
167         }
168         xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
169         if (!xsp)
170         {
171             wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
172             return -1;
173         }
174         else
175         {
176             struct yaz_record_conv_rule *r = 
177                 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
178             r->u.xslt.xsp = xsp;
179         }
180     }
181     return 0;
182 }
183
184 /** \brief parse 'marc' conversion node */
185 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
186 {
187     struct _xmlAttr *attr;
188     const char *input_charset = 0;
189     const char *output_charset = 0;
190     const char *input_format = 0;
191     const char *output_format = 0;
192     int input_format_mode = 0;
193     int output_format_mode = 0;
194     struct yaz_record_conv_rule *r;
195     yaz_iconv_t cd = 0;
196
197     for (attr = ptr->properties; attr; attr = attr->next)
198     {
199         if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
200             attr->children && attr->children->type == XML_TEXT_NODE)
201             input_charset = (const char *) attr->children->content;
202         else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
203             attr->children && attr->children->type == XML_TEXT_NODE)
204             output_charset = (const char *) attr->children->content;
205         else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
206             attr->children && attr->children->type == XML_TEXT_NODE)
207             input_format = (const char *) attr->children->content;
208         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
209             attr->children && attr->children->type == XML_TEXT_NODE)
210             output_format = (const char *) attr->children->content;
211         else
212         {
213             wrbuf_printf(p->wr_error, "Bad attribute '%s'", attr->name);
214             return -1;
215         }
216     }
217     if (!input_format)
218     {
219         wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
220         return -1;
221     }
222     else if (!strcmp(input_format, "marc"))
223     {
224         input_format_mode = YAZ_MARC_ISO2709;
225     }
226     else if (!strcmp(input_format, "xml"))
227     {
228         input_format_mode = YAZ_MARC_MARCXML;
229         /** Libxml2 generates UTF-8 encoding by default .
230             So we convert from UTF-8 to outputcharset (if defined) 
231         */
232         if (!input_charset && output_charset)
233             input_charset = "utf-8";
234     }
235     else
236     {
237         wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
238         return -1;
239     }
240     
241     if (!output_format)
242     {
243         wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
244         return -1;
245     }
246     else if (!strcmp(output_format, "line"))
247     {
248         output_format_mode = YAZ_MARC_LINE;
249     }
250     else if (!strcmp(output_format, "marcxml"))
251     {
252         output_format_mode = YAZ_MARC_MARCXML;
253         if (input_charset && !output_charset)
254             output_charset = "utf-8";
255     }
256     else if (!strcmp(output_format, "marc"))
257     {
258         output_format_mode = YAZ_MARC_ISO2709;
259     }
260     else if (!strcmp(output_format, "marcxchange"))
261     {
262         output_format_mode = YAZ_MARC_XCHANGE;
263         if (input_charset && !output_charset)
264             output_charset = "utf-8";
265     }
266     else
267     {
268         wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
269         return -1;
270     }
271     if (input_charset && output_charset)
272     {
273         cd = yaz_iconv_open(output_charset, input_charset);
274         if (!cd)
275         {
276             wrbuf_printf(p->wr_error, "Unsupported character set mamping"
277                          " inputcharset=%s outputcharset=%s",
278                          input_charset, output_charset);
279             return -1;
280         }
281     }
282     else if (input_charset)
283     {
284         wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
285         return -1;
286     }
287     else if (output_charset)
288     {
289         wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
290         return -1;
291     }
292     r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
293     r->u.marc.iconv_t = cd;
294
295     r->u.marc.input_format = input_format_mode;
296     r->u.marc.output_format = output_format_mode;
297     return 0;
298 }
299
300 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
301 {
302     const xmlNode *ptr = ptr_v; 
303
304     yaz_record_conv_reset(p);
305
306     if (ptr && ptr->type == XML_ELEMENT_NODE &&
307         !strcmp((const char *) ptr->name, "convert"))
308     {
309         for (ptr = ptr->children; ptr; ptr = ptr->next)
310         {
311             if (ptr->type != XML_ELEMENT_NODE)
312                 continue;
313             if (!strcmp((const char *) ptr->name, "xslt"))
314             {
315                 if (conv_xslt(p, ptr))
316                     return -1;
317             }
318             else if (!strcmp((const char *) ptr->name, "marc"))
319             {
320                 if (conv_marc(p, ptr))
321                     return -1;
322             }
323             else
324             {
325                 wrbuf_printf(p->wr_error, "Bad element '%s'."
326                               "Expected marc, xslt, ..", ptr->name);
327                 return -1;
328             }
329         }
330     }
331     else
332     {
333         wrbuf_printf(p->wr_error, "Missing 'convert' element");
334         return -1;
335     }
336     return 0;
337 }
338
339 int yaz_record_conv_record(yaz_record_conv_t p,
340                            const char *input_record_buf,
341                            size_t input_record_len,
342                            WRBUF output_record)
343 {
344     int ret = 0;
345     WRBUF record = output_record; /* pointer transfer */
346     struct yaz_record_conv_rule *r = p->rules;
347     wrbuf_rewind(p->wr_error);
348     
349     wrbuf_write(record, input_record_buf, input_record_len);
350     for (; ret == 0 && r; r = r->next)
351     {
352         if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
353         {
354             xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
355                                            wrbuf_len(record));
356             if (!doc)
357             {
358                 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
359                 ret = -1;
360             }
361             else
362             {
363                 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
364                 if (res)
365                 {
366                     xmlChar *out_buf;
367                     int out_len;
368                     xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
369
370                     wrbuf_rewind(record);
371                     wrbuf_write(record, (const char *) out_buf, out_len);
372
373                     xmlFree(out_buf);
374                     xmlFreeDoc(res);
375                 }
376                 else
377                 {
378                     wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
379                     ret = -1;
380                 }
381                 xmlFreeDoc(doc);
382             }
383         }
384         else if (r->which == YAZ_RECORD_CONV_RULE_MARC)
385         {
386             yaz_marc_t mt = yaz_marc_create();
387
388             yaz_marc_xml(mt, r->u.marc.output_format);
389
390             if (r->u.marc.iconv_t)
391                 yaz_marc_iconv(mt, r->u.marc.iconv_t);
392             if (r->u.marc.input_format == YAZ_MARC_ISO2709)
393             {
394                 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
395                                                wrbuf_len(record));
396                 if (sz > 0)
397                     ret = 0;
398                 else
399                     ret = -1;
400             }
401             else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
402             {
403                 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
404                                                wrbuf_len(record));
405                 if (!doc)
406                 {
407                     wrbuf_printf(p->wr_error, "xmlParseMemory failed");
408                     ret = -1;
409                 }
410                 else
411                 {
412                     ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
413                     if (ret)
414                         wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
415                 }
416                 xmlFreeDoc(doc);
417             }
418             else
419             {
420                 wrbuf_printf(p->wr_error, "unsupported input format");
421                 ret = -1;
422             }
423             if (ret == 0)
424             {
425                 wrbuf_rewind(record);
426                 ret = yaz_marc_write_mode(mt, record);
427                 if (ret)
428                     wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
429             }
430             yaz_marc_destroy(mt);
431         }
432     }
433     return ret;
434 }
435
436 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
437 {
438     return wrbuf_buf(p->wr_error);
439 }
440
441 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
442 {
443     xfree(p->path);
444     p->path = 0;
445     if (path)
446         p->path = xstrdup(path);
447 }
448 #endif
449
450 /*
451  * Local variables:
452  * c-basic-offset: 4
453  * indent-tabs-mode: nil
454  * End:
455  * vim: shiftwidth=4 tabstop=8 expandtab
456  */
457