MARC-in-JSON support for record conversion utility
[yaz-moved-to-github.git] / src / record_conv.c
index a272aee..671342c 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2012 Index Data
+ * Copyright (C) 1995-2013 Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -57,6 +57,7 @@ struct marc_info {
     const char *output_charset;
     int input_format_mode;
     int output_format_mode;
+    const char *leader_spec;
 };
 
 /** \brief tranformation info (rule info) */
@@ -97,11 +98,21 @@ void yaz_record_conv_destroy(yaz_record_conv_t p)
 }
 
 #if YAZ_HAVE_XSLT
+struct xslt_info {
+    NMEM nmem;
+    xmlDocPtr xsp_doc;
+    const char **xsl_parms;
+};
+
 static void *construct_xslt(const xmlNode *ptr,
                             const char *path, WRBUF wr_error)
 {
     struct _xmlAttr *attr;
     const char *stylesheet = 0;
+    struct xslt_info *info = 0;
+    NMEM nmem = 0;
+    int max_parms = 10;
+    int no_parms = 0;
 
     if (strcmp((const char *) ptr->name, "xslt"))
         return 0;
@@ -118,17 +129,77 @@ static void *construct_xslt(const xmlNode *ptr,
             return 0;
         }
     }
+    nmem = nmem_create();
+    info = nmem_malloc(nmem, sizeof(*info));
+    info->nmem = nmem;
+    info->xsl_parms = nmem_malloc(
+        nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms));
+
+    for (ptr = ptr->children; ptr; ptr = ptr->next)
+    {
+        const char *name = 0;
+        const char *value = 0;
+        char *qvalue = 0;
+        if (ptr->type != XML_ELEMENT_NODE)
+            continue;
+        if (strcmp((const char *) ptr->name, "param"))
+        {
+            wrbuf_printf(wr_error, "Bad element '%s'"
+                         "Expected param.", ptr->name);
+            nmem_destroy(nmem);
+            return 0;
+        }
+        for (attr = ptr->properties; attr; attr = attr->next)
+        {
+            if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
+                attr->children && attr->children->type == XML_TEXT_NODE)
+                name = (const char *) attr->children->content;
+            else if (!xmlStrcmp(attr->name, BAD_CAST "value") &&
+                attr->children && attr->children->type == XML_TEXT_NODE)
+                value = (const char *) attr->children->content;
+            else
+            {
+                wrbuf_printf(wr_error, "Bad attribute '%s'"
+                             "Expected name or value.", attr->name);
+                nmem_destroy(nmem);
+                return 0;
+            }
+        }
+        if (!name || !value)
+        {
+            wrbuf_printf(wr_error, "Missing attributes name or value");
+            nmem_destroy(nmem);
+            return 0;
+        }
+        if (no_parms >= max_parms)
+        {
+            wrbuf_printf(wr_error, "Too many parameters given");
+            nmem_destroy(nmem);
+            return 0;
+        }
+
+        qvalue = nmem_malloc(nmem, strlen(value) + 3);
+        strcpy(qvalue, "\'");
+        strcat(qvalue, value);
+        strcat(qvalue, "\'");
+
+        info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name);
+        info->xsl_parms[2 * no_parms + 1] = qvalue;
+        no_parms++;
+    }
+
+    info->xsl_parms[2 * no_parms] = '\0';
+
     if (!stylesheet)
     {
         wrbuf_printf(wr_error, "Element <xslt>: "
                      "attribute 'stylesheet' expected");
-        return 0;
+        nmem_destroy(nmem);
     }
     else
     {
         char fullpath[1024];
         xsltStylesheetPtr xsp;
-        xmlDocPtr xsp_doc;
         if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
         {
             wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
@@ -136,21 +207,23 @@ static void *construct_xslt(const xmlNode *ptr,
                          stylesheet, stylesheet);
             if (path)
                 wrbuf_printf(wr_error, " with path '%s'", path);
-                
+
+            nmem_destroy(nmem);
             return 0;
         }
-        xsp_doc = xmlParseFile(fullpath);
-        if (!xsp_doc)
+        info->xsp_doc = xmlParseFile(fullpath);
+        if (!info->xsp_doc)
         {
             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
                          " xml parse failed: %s", stylesheet, fullpath);
             if (path)
                 wrbuf_printf(wr_error, " with path '%s'", path);
+            nmem_destroy(nmem);
             return 0;
         }
         /* need to copy this before passing it to the processor. It will
            be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
-        xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
+        xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1));
         if (!xsp)
         {
             wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
@@ -159,27 +232,29 @@ static void *construct_xslt(const xmlNode *ptr,
                 wrbuf_printf(wr_error, " with path '%s'", path);
             wrbuf_printf(wr_error, " ("
 #if YAZ_HAVE_EXSLT
-                         
+
                          "EXSLT enabled"
 #else
                          "EXSLT not supported"
 #endif
                          ")");
-            xmlFreeDoc(xsp_doc);
-            return 0;
+            xmlFreeDoc(info->xsp_doc);
+            nmem_destroy(info->nmem);
         }
         else
         {
             xsltFreeStylesheet(xsp);
-            return xsp_doc;
+            return info;
         }
     }
     return 0;
 }
 
-static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
+static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error)
 {
     int ret = 0;
+    struct xslt_info *info = vinfo;
+
     xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
                                    wrbuf_len(record));
     if (!doc)
@@ -189,14 +264,14 @@ static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
     }
     else
     {
-        xmlDocPtr xsp_doc = xmlCopyDoc((xmlDocPtr) info, 1);
+        xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1);
         xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
-        xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
+        xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms);
         if (res)
         {
             xmlChar *out_buf = 0;
             int out_len;
-            
+
 #if HAVE_XSLTSAVERESULTTOSTRING
             xsltSaveResultToString(&out_buf, &out_len, res, xsp);
 #else
@@ -212,7 +287,7 @@ static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
             {
                 wrbuf_rewind(record);
                 wrbuf_write(record, (const char *) out_buf, out_len);
-                
+
                 xmlFree(out_buf);
             }
             xmlFreeDoc(res);
@@ -228,12 +303,14 @@ static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
     return ret;
 }
 
-static void destroy_xslt(void *info)
+static void destroy_xslt(void *vinfo)
 {
+    struct xslt_info *info = vinfo;
+
     if (info)
     {
-        xmlDocPtr xsp_doc = info;
-        xmlFreeDoc(xsp_doc);
+        xmlFreeDoc(info->xsp_doc);
+        nmem_destroy(info->nmem);
     }
 }
 
@@ -261,6 +338,7 @@ static void *construct_marc(const xmlNode *ptr,
     info->output_charset = 0;
     info->input_format_mode = 0;
     info->output_format_mode = 0;
+    info->leader_spec = 0;
 
     for (attr = ptr->properties; attr; attr = attr->next)
     {
@@ -276,11 +354,15 @@ static void *construct_marc(const xmlNode *ptr,
         else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
             attr->children && attr->children->type == XML_TEXT_NODE)
             output_format = (const char *) attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
+                 attr->children && attr->children->type == XML_TEXT_NODE)
+            info->leader_spec =
+                nmem_strdup(info->nmem,(const char *) attr->children->content);
         else
         {
             wrbuf_printf(wr_error, "Element <marc>: expected attributes"
                          "'inputformat', 'inputcharset', 'outputformat' or"
-                         " 'outputcharset', got attribute '%s'", 
+                         " 'outputcharset', got attribute '%s'",
                          attr->name);
             nmem_destroy(info->nmem);
             return 0;
@@ -301,24 +383,28 @@ static void *construct_marc(const xmlNode *ptr,
     {
         info->input_format_mode = YAZ_MARC_MARCXML;
         /** Libxml2 generates UTF-8 encoding by default .
-            So we convert from UTF-8 to outputcharset (if defined) 
+            So we convert from UTF-8 to outputcharset (if defined)
         */
         if (!info->input_charset && info->output_charset)
             info->input_charset = "utf-8";
     }
+    else if (!strcmp(input_format, "json"))
+    {
+        info->input_format_mode = YAZ_MARC_JSON;
+    }
     else
     {
         wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
                      " Unsupported input format"
-                     " defined by attribute value", 
+                     " defined by attribute value",
                      input_format);
         nmem_destroy(info->nmem);
         return 0;
     }
-    
+
     if (!output_format)
     {
-        wrbuf_printf(wr_error, 
+        wrbuf_printf(wr_error,
                      "Element <marc>: attribute 'outputformat' required");
         nmem_destroy(info->nmem);
         return 0;
@@ -349,11 +435,17 @@ static void *construct_marc(const xmlNode *ptr,
         if (info->input_charset && !info->output_charset)
             info->output_charset = "utf-8";
     }
+    else if (!strcmp(output_format, "json"))
+    {
+        info->output_format_mode = YAZ_MARC_JSON;
+        if (info->input_charset && !info->output_charset)
+            info->output_charset = "utf-8";
+    }
     else
     {
         wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
                      " Unsupported output format"
-                     " defined by attribute value", 
+                     " defined by attribute value",
                      output_format);
         nmem_destroy(info->nmem);
         return 0;
@@ -364,7 +456,7 @@ static void *construct_marc(const xmlNode *ptr,
                                         info->input_charset);
         if (!cd)
         {
-            wrbuf_printf(wr_error, 
+            wrbuf_printf(wr_error,
                          "Element <marc inputcharset='%s' outputcharset='%s'>:"
                          " Unsupported character set mapping"
                          " defined by attribute values",
@@ -397,12 +489,14 @@ static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
 {
     struct marc_info *mi = info;
     int ret = 0;
-    
+
     yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
     yaz_marc_t mt = yaz_marc_create();
-    
+
     yaz_marc_xml(mt, mi->output_format_mode);
-    
+    if (mi->leader_spec)
+        yaz_marc_leader_spec(mt, mi->leader_spec);
+
     if (cd)
         yaz_marc_iconv(mt, cd);
     if (mi->input_format_mode == YAZ_MARC_ISO2709)
@@ -453,7 +547,7 @@ static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
 static void destroy_marc(void *info)
 {
     struct marc_info *mi = info;
-    
+
     nmem_destroy(mi->nmem);
 }
 
@@ -461,7 +555,7 @@ int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
                                 struct yaz_record_conv_type *types)
 {
     struct yaz_record_conv_type bt[2];
-    
+
     /* register marc */
     bt[0].construct = construct_marc;
     bt[0].convert = convert_marc;
@@ -477,7 +571,7 @@ int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
 #else
     bt[0].next = types;
 #endif
-    
+
     yaz_record_conv_reset(p);
 
     /* parsing element children */
@@ -530,7 +624,7 @@ static int yaz_record_conv_record_rule(yaz_record_conv_t p,
     int ret = 0;
     WRBUF record = output_record; /* pointer transfer */
     wrbuf_rewind(p->wr_error);
-    
+
     wrbuf_write(record, input_record_buf, input_record_len);
     for (; ret == 0 && r; r = r->next)
         ret = r->type->convert(r->info, record, p->wr_error);
@@ -544,7 +638,10 @@ int yaz_record_conv_opac_record(yaz_record_conv_t p,
     int ret = 0;
     struct yaz_record_conv_rule *r = p->rules;
     if (!r || r->type->construct != construct_marc)
+    {
+        wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC");
         ret = -1; /* no marc rule so we can't do OPAC */
+    }
     else
     {
         struct marc_info *mi = r->info;
@@ -553,16 +650,16 @@ int yaz_record_conv_opac_record(yaz_record_conv_t p,
         yaz_marc_t mt = yaz_marc_create();
         yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
                                         mi->input_charset);
-        
+
         wrbuf_rewind(p->wr_error);
         yaz_marc_xml(mt, mi->output_format_mode);
-        
+
         yaz_marc_iconv(mt, cd);
-        
+
         yaz_opac_decode_wrbuf(mt, input_record, res);
         if (ret != -1)
         {
-            ret = yaz_record_conv_record_rule(p, 
+            ret = yaz_record_conv_record_rule(p,
                                               r->next,
                                               wrbuf_buf(res), wrbuf_len(res),
                                               output_record);
@@ -606,8 +703,8 @@ yaz_record_conv_t yaz_record_conv_create()
     p->rules = 0;
     p->path = 0;
 #if YAZ_HAVE_EXSLT
-    exsltRegisterAll(); 
-#endif    
+    exsltRegisterAll();
+#endif
     return p;
 }