For Libxml2 and friends, YAZ defines YAZ_HAVE_{XML2,XSLT,EXSLT) in
[yaz-moved-to-github.git] / src / record_conv.c
index 4ceb023..9d8a934 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 2005-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: record_conv.c,v 1.1 2006-05-02 20:47:45 adam Exp $
+ * $Id: record_conv.c,v 1.11 2006-07-06 10:17:53 adam Exp $
  */
 /**
  * \file record_conv.c
 #include <config.h>
 #endif
 
-#if HAVE_XML2
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#endif
-
 #include <string.h>
-
+#include <yaz/yaz-iconv.h>
+#include <yaz/marcdisp.h>
 #include <yaz/record_conv.h>
 #include <yaz/wrbuf.h>
 #include <yaz/xmalloc.h>
 #include <yaz/nmem.h>
+#include <yaz/tpath.h>
+
+#if YAZ_HAVE_XML2
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xinclude.h>
+#if YAZ_HAVE_XSLT
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
+#endif
+#if YAZ_HAVE_EXSLT
+#include <libexslt/exslt.h>
+#endif
 
 /** \brief The internal structure for yaz_record_conv_t */
 struct yaz_record_conv_struct {
-    /** memory for configuration */
+    /** \brief memory for configuration */
     NMEM nmem;
 
-    /** conversion rules (allocated using NMEM) */
+    /** \brief conversion rules (allocated using NMEM) */
     struct yaz_record_conv_rule *rules;
 
-    /** pointer to last conversion rule pointer in chain */
+    /** \brief pointer to last conversion rule pointer in chain */
     struct yaz_record_conv_rule **rules_p;
 
-    /** string buffer for error messages */
+    /** \brief string buffer for error messages */
     WRBUF wr_error;
+
+    /** \brief path for opening files  */
+    char *path;
 };
 
 /** \brief tranformation types (rule types) */
 enum YAZ_RECORD_CONV_RULE 
 {
     YAZ_RECORD_CONV_RULE_XSLT,
-    YAZ_RECORD_CONV_RULE_MARC_TO_XML,
-    YAZ_RECORD_CONV_RULE_XML_TO_MARC
+    YAZ_RECORD_CONV_RULE_MARC
 };
 
+
 /** \brief tranformation info (rule info) */
 struct yaz_record_conv_rule {
     enum YAZ_RECORD_CONV_RULE which;
     union {
+#if YAZ_HAVE_XSLT
         struct {
-            const char *stylesheet;
+            xsltStylesheetPtr xsp;
         } xslt;
+#endif
         struct {
-            const char *charset;
-        } marc_to_xml;
-        struct {
-            const char *charset;
-        } xml_to_marc;
+            yaz_iconv_t iconv_t;
+            int input_format;
+            int output_format;
+        } marc;
     } u;
     struct yaz_record_conv_rule *next;
 };
 
+/** \brief reset rules+configuration */
+static void yaz_record_conv_reset(yaz_record_conv_t p)
+{
+    struct yaz_record_conv_rule *r;
+    for (r = p->rules; r; r = r->next)
+    {
+        if (r->which == YAZ_RECORD_CONV_RULE_MARC)
+        {
+            if (r->u.marc.iconv_t)
+                yaz_iconv_close(r->u.marc.iconv_t);
+        }
+#if YAZ_HAVE_XSLT
+        else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
+        {
+            xsltFreeStylesheet(r->u.xslt.xsp);
+        }
+#endif
+    }
+    wrbuf_rewind(p->wr_error);
+    nmem_reset(p->nmem);
+
+    p->rules = 0;
+
+    p->rules_p = &p->rules;
+}
+
 yaz_record_conv_t yaz_record_conv_create()
 {
     yaz_record_conv_t p = xmalloc(sizeof(*p));
     p->nmem = nmem_create();
     p->wr_error = wrbuf_alloc();
+    p->rules = 0;
+    p->path = 0;
+
+#if YAZ_HAVE_EXSLT
+    exsltRegisterAll(); 
+#endif
+    yaz_record_conv_reset(p);
     return p;
 }
 
@@ -77,13 +123,15 @@ void yaz_record_conv_destroy(yaz_record_conv_t p)
 {
     if (p)
     {
+        yaz_record_conv_reset(p);
         nmem_destroy(p->nmem);
         wrbuf_free(p->wr_error, 1);
+        xfree(p->path);
         xfree(p);
     }
 }
 
-#if HAVE_XML2
+/** \brief adds a rule */
 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
                                              enum YAZ_RECORD_CONV_RULE type)
 {
@@ -95,16 +143,10 @@ static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
     return r;
 }
 
-static void yaz_record_conv_reset(yaz_record_conv_t p)
-{
-    wrbuf_rewind(p->wr_error);
-    nmem_reset(p->nmem);
-    p->rules = 0;
-    p->rules_p = &p->rules;
-}
-
+/** \brief parse 'xslt' conversion node */
 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
 {
+#if YAZ_HAVE_XSLT
     struct _xmlAttr *attr;
     const char *stylesheet = 0;
 
@@ -115,75 +157,163 @@ static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
             stylesheet = (const char *) attr->children->content;
         else
         {
-            wrbuf_printf(p->wr_error, "Bad attribute '%s'."
+            wrbuf_printf(p->wr_error, "Bad attribute '%s'"
                          "Expected stylesheet.", attr->name);
             return -1;
         }
     }
-    if (stylesheet)
+    if (!stylesheet)
     {
-        struct yaz_record_conv_rule *r =
-            add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
-        r->u.xslt.stylesheet = nmem_strdup(p->nmem, stylesheet);
-        return 0;
+        wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
+        return -1;
     }
-    wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
-    return -1;
-}
-
-static int conv_marc_to_xml(yaz_record_conv_t p, const xmlNode *ptr)
-{
-    struct _xmlAttr *attr;
-    const char *charset = 0;
-    struct yaz_record_conv_rule *r;
-
-    for (attr = ptr->properties; attr; attr = attr->next)
+    else
     {
-        if (!xmlStrcmp(attr->name, BAD_CAST "charset") &&
-            attr->children && attr->children->type == XML_TEXT_NODE)
-            charset = (const char *) attr->children->content;
-        else
+        char fullpath[1024];
+        xsltStylesheetPtr xsp;
+        if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
+        {
+            wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
+                         stylesheet, p->path);
+            return -1;
+        }
+        xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
+        if (!xsp)
         {
-            wrbuf_printf(p->wr_error, "Bad attribute '%s'."
-                         "Expected charset.", attr->name);
+            wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
             return -1;
         }
+        else
+        {
+            struct yaz_record_conv_rule *r = 
+                add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
+            r->u.xslt.xsp = xsp;
+        }
     }
-    r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC_TO_XML);
-    if (charset)
-        r->u.marc_to_xml.charset = nmem_strdup(p->nmem, charset);
-    else
-        r->u.marc_to_xml.charset = 0;
     return 0;
+#else
+    wrbuf_printf(p->wr_error, "xslt unsupported."
+                 " YAZ compiled without XSLT support");
+    return -1;
+#endif
 }
 
-static int conv_xml_to_marc(yaz_record_conv_t p, const xmlNode *ptr)
+/** \brief parse 'marc' conversion node */
+static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
 {
     struct _xmlAttr *attr;
-    const char *charset = 0;
+    const char *input_charset = 0;
+    const char *output_charset = 0;
+    const char *input_format = 0;
+    const char *output_format = 0;
+    int input_format_mode = 0;
+    int output_format_mode = 0;
     struct yaz_record_conv_rule *r;
+    yaz_iconv_t cd = 0;
 
     for (attr = ptr->properties; attr; attr = attr->next)
     {
-        if (!xmlStrcmp(attr->name, BAD_CAST "charset") &&
+        if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
+            attr->children && attr->children->type == XML_TEXT_NODE)
+            input_charset = (const char *) attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
+            attr->children && attr->children->type == XML_TEXT_NODE)
+            output_charset = (const char *) attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
             attr->children && attr->children->type == XML_TEXT_NODE)
-            charset = (const char *) attr->children->content;
+            input_format = (const char *) attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
+            attr->children && attr->children->type == XML_TEXT_NODE)
+            output_format = (const char *) attr->children->content;
         else
         {
-            wrbuf_printf(p->wr_error, "Bad attribute '%s'."
-                         "Expected charset.", attr->name);
+            wrbuf_printf(p->wr_error, "Bad attribute '%s'", attr->name);
             return -1;
         }
     }
-    r = add_rule(p, YAZ_RECORD_CONV_RULE_XML_TO_MARC);
-    if (charset)
-        r->u.xml_to_marc.charset = nmem_strdup(p->nmem, charset);
+    if (!input_format)
+    {
+        wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
+        return -1;
+    }
+    else if (!strcmp(input_format, "marc"))
+    {
+        input_format_mode = YAZ_MARC_ISO2709;
+    }
+    else if (!strcmp(input_format, "xml"))
+    {
+        input_format_mode = YAZ_MARC_MARCXML;
+        /** Libxml2 generates UTF-8 encoding by default .
+            So we convert from UTF-8 to outputcharset (if defined) 
+        */
+        if (!input_charset && output_charset)
+            input_charset = "utf-8";
+    }
+    else
+    {
+        wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
+        return -1;
+    }
+    
+    if (!output_format)
+    {
+        wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
+        return -1;
+    }
+    else if (!strcmp(output_format, "line"))
+    {
+        output_format_mode = YAZ_MARC_LINE;
+    }
+    else if (!strcmp(output_format, "marcxml"))
+    {
+        output_format_mode = YAZ_MARC_MARCXML;
+        if (input_charset && !output_charset)
+            output_charset = "utf-8";
+    }
+    else if (!strcmp(output_format, "marc"))
+    {
+        output_format_mode = YAZ_MARC_ISO2709;
+    }
+    else if (!strcmp(output_format, "marcxchange"))
+    {
+        output_format_mode = YAZ_MARC_XCHANGE;
+        if (input_charset && !output_charset)
+            output_charset = "utf-8";
+    }
     else
-        r->u.xml_to_marc.charset = 0;
+    {
+        wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
+        return -1;
+    }
+    if (input_charset && output_charset)
+    {
+        cd = yaz_iconv_open(output_charset, input_charset);
+        if (!cd)
+        {
+            wrbuf_printf(p->wr_error, "Unsupported character set mamping"
+                         " inputcharset=%s outputcharset=%s",
+                         input_charset, output_charset);
+            return -1;
+        }
+    }
+    else if (input_charset)
+    {
+        wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
+        return -1;
+    }
+    else if (output_charset)
+    {
+        wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
+        return -1;
+    }
+    r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
+    r->u.marc.iconv_t = cd;
+
+    r->u.marc.input_format = input_format_mode;
+    r->u.marc.output_format = output_format_mode;
     return 0;
 }
 
-
 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
 {
     const xmlNode *ptr = ptr_v; 
@@ -202,20 +332,26 @@ int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
                 if (conv_xslt(p, ptr))
                     return -1;
             }
-            else if (!strcmp((const char *) ptr->name, "marc_to_xml"))
+            else if (!strcmp((const char *) ptr->name, "exslt"))
             {
-                if (conv_marc_to_xml(p, ptr))
+#if YAZ_HAVE_EXSLT
+                if (conv_xslt(p, ptr))
                     return -1;
+#else
+                wrbuf_printf(p->wr_error, "exslt unsupported."
+                             " YAZ compiled without EXSLT support");
+                return -1;
+#endif
             }
-            else if (!strcmp((const char *) ptr->name, "xml_to_marc"))
+            else if (!strcmp((const char *) ptr->name, "marc"))
             {
-                if (conv_xml_to_marc(p, ptr))
+                if (conv_marc(p, ptr))
                     return -1;
             }
             else
             {
                 wrbuf_printf(p->wr_error, "Bad element '%s'."
-                             "Expected xslt, marc_to_xml,...", ptr->name);
+                              "Expected marc, xslt, ..", ptr->name);
                 return -1;
             }
         }
@@ -228,22 +364,133 @@ int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
     return 0;
 }
 
-#else
-/* HAVE_XML2 */
-int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
+int yaz_record_conv_record(yaz_record_conv_t p,
+                           const char *input_record_buf,
+                           size_t input_record_len,
+                           WRBUF output_record)
 {
+    int ret = 0;
+    WRBUF record = output_record; /* pointer transfer */
+    struct yaz_record_conv_rule *r = p->rules;
     wrbuf_rewind(p->wr_error);
-    wrbuf_printf(p->wr_error, "No XML support for yaz_record_conv");
-    return -1;
-}
+    
+    wrbuf_write(record, input_record_buf, input_record_len);
+    for (; ret == 0 && r; r = r->next)
+    {
+        if (r->which == YAZ_RECORD_CONV_RULE_MARC)
+        {
+            yaz_marc_t mt = yaz_marc_create();
+
+            yaz_marc_xml(mt, r->u.marc.output_format);
+
+            if (r->u.marc.iconv_t)
+                yaz_marc_iconv(mt, r->u.marc.iconv_t);
+            if (r->u.marc.input_format == YAZ_MARC_ISO2709)
+            {
+                int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
+                                               wrbuf_len(record));
+                if (sz > 0)
+                    ret = 0;
+                else
+                    ret = -1;
+            }
+            else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
+            {
+                xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+                                               wrbuf_len(record));
+                if (!doc)
+                {
+                    wrbuf_printf(p->wr_error, "xmlParseMemory failed");
+                    ret = -1;
+                }
+                else
+                {
+                    ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
+                    if (ret)
+                        wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
+                }
+                xmlFreeDoc(doc);
+            }
+            else
+            {
+                wrbuf_printf(p->wr_error, "unsupported input format");
+                ret = -1;
+            }
+            if (ret == 0)
+            {
+                wrbuf_rewind(record);
+                ret = yaz_marc_write_mode(mt, record);
+                if (ret)
+                    wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
+            }
+            yaz_marc_destroy(mt);
+        }
+#if YAZ_HAVE_XSLT
+        else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
+        {
+            xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+                                           wrbuf_len(record));
+            if (!doc)
+            {
+                wrbuf_printf(p->wr_error, "xmlParseMemory failed");
+                ret = -1;
+            }
+            else
+            {
+                xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
+                if (res)
+                {
+                    xmlChar *out_buf = 0;
+                    int out_len;
 
+#if YAZ_HAVE_XSLTSAVERESULTTOSTRING
+                    xsltSaveResultToString(&out_buf, &out_len, res,
+                                           r->u.xslt.xsp); 
+#else
+                    xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
 #endif
+                    if (!out_buf)
+                    {
+                        wrbuf_printf(p->wr_error,
+                                     "xsltSaveResultToString failed");
+                        ret = -1;
+                    }
+                    else
+                    {
+                        wrbuf_rewind(record);
+                        wrbuf_write(record, (const char *) out_buf, out_len);
+                        
+                        xmlFree(out_buf);
+                    }
+                    xmlFreeDoc(res);
+                }
+                else
+                {
+                    wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed");
+                    ret = -1;
+                }
+                xmlFreeDoc(doc);
+            }
+        }
+#endif
+    }
+    return ret;
+}
 
 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
 {
     return wrbuf_buf(p->wr_error);
 }
 
+void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
+{
+    xfree(p->path);
+    p->path = 0;
+    if (path)
+        p->path = xstrdup(path);
+}
+#endif
+
 /*
  * Local variables:
  * c-basic-offset: 4