Fix crash in record conv rule select YAZ-812
[yaz-moved-to-github.git] / src / record_conv.c
index e67ef00..2fbe986 100644 (file)
@@ -25,6 +25,8 @@
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 #include <libxml/xinclude.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
 #if YAZ_HAVE_XSLT
 #include <libxslt/xsltutils.h>
 #include <libxslt/transform.h>
@@ -317,6 +319,97 @@ static void destroy_xslt(void *vinfo)
 /* YAZ_HAVE_XSLT */
 #endif
 
+struct select_info {
+    NMEM nmem;
+    char *xpath_expr;
+};
+
+static void *construct_select(const xmlNode *ptr,
+                              const char *path, WRBUF wr_error)
+{
+    if (strcmp((const char *) ptr->name, "select"))
+        return 0;
+    else
+    {
+        struct _xmlAttr *attr;
+        NMEM nmem = nmem_create();
+        struct select_info *info = nmem_malloc(nmem, sizeof(*info));
+
+        info->nmem = nmem;
+        info->xpath_expr = 0;
+        for (attr = ptr->properties; attr; attr = attr->next)
+        {
+            if (!xmlStrcmp(attr->name, BAD_CAST "path") &&
+                attr->children && attr->children->type == XML_TEXT_NODE)
+                info->xpath_expr =
+                    nmem_strdup(nmem, (const char *) attr->children->content);
+            else
+            {
+                wrbuf_printf(wr_error, "Bad attribute '%s'"
+                             "Expected xpath.", attr->name);
+                nmem_destroy(nmem);
+                return 0;
+            }
+        }
+        return info;
+    }
+}
+
+static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error)
+{
+    int ret = 0;
+    struct select_info *info = vinfo;
+
+    xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+                                   wrbuf_len(record));
+    if (!doc)
+    {
+        wrbuf_printf(wr_error, "xmlParseMemory failed");
+        ret = -1;
+    }
+    else
+    {
+        xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+        if (xpathCtx && info->xpath_expr)
+        {
+            xmlXPathObjectPtr xpathObj =
+                xmlXPathEvalExpression((const xmlChar *) info->xpath_expr,
+                                       xpathCtx);
+            if (xpathObj)
+            {
+                xmlNodeSetPtr nodes = xpathObj->nodesetval;
+                wrbuf_rewind(record);
+                if (nodes)
+                {
+                    int i;
+                    for (i = 0; i < nodes->nodeNr; i++)
+                    {
+                        xmlNode *ptr = nodes->nodeTab[i];
+                        if (ptr->type == XML_ELEMENT_NODE)
+                            ptr = ptr->children;
+                        for (; ptr; ptr = ptr->next)
+                            if (ptr->type == XML_TEXT_NODE)
+                                wrbuf_puts(record, (const char *) ptr->content);
+                    }
+                }
+                xmlXPathFreeObject(xpathObj);
+            }
+            xmlXPathFreeContext(xpathCtx);
+        }
+        xmlFreeDoc(doc);
+    }
+    return ret;
+}
+
+static void destroy_select(void *vinfo)
+{
+    struct select_info *info = vinfo;
+
+    if (info)
+        nmem_destroy(info->nmem);
+}
+
+
 static void *construct_solrmarc(const xmlNode *ptr,
                                 const char *path, WRBUF wr_error)
 {
@@ -519,23 +612,25 @@ static void *construct_marc(const xmlNode *ptr,
 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
 {
     struct marc_info *mi = info;
+    const char *input_charset = mi->input_charset;
     int ret = 0;
-
-    yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
     yaz_marc_t mt = yaz_marc_create();
 
     yaz_marc_xml(mt, mi->output_format_mode);
     if (mi->leader_spec)
         yaz_marc_leader_spec(mt, mi->leader_spec);
 
-    if (cd)
-        yaz_marc_iconv(mt, cd);
     if (mi->input_format_mode == YAZ_MARC_ISO2709)
     {
         int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
                                        wrbuf_len(record));
         if (sz > 0)
+        {
+            if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record),
+                                             wrbuf_len(record)))
+                input_charset = "utf-8";
             ret = 0;
+        }
         else
             ret = -1;
     }
@@ -564,13 +659,18 @@ static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
     }
     if (ret == 0)
     {
+        yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset);
+
+        if (cd)
+            yaz_marc_iconv(mt, cd);
+
         wrbuf_rewind(record);
         ret = yaz_marc_write_mode(mt, record);
         if (ret)
             wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
+        if (cd)
+            yaz_iconv_close(cd);
     }
-    if (cd)
-        yaz_iconv_close(cd);
     yaz_marc_destroy(mt);
     return ret;
 }
@@ -585,7 +685,7 @@ static void destroy_marc(void *info)
 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
                                 struct yaz_record_conv_type *types)
 {
-    struct yaz_record_conv_type bt[3];
+    struct yaz_record_conv_type bt[4];
     size_t i = 0;
 
     /* register marc */
@@ -598,6 +698,11 @@ int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
     bt[i].convert = convert_solrmarc;
     bt[i++].destroy = destroy_solrmarc;
 
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_select;
+    bt[i].convert = convert_select;
+    bt[i++].destroy = destroy_select;
+
 #if YAZ_HAVE_XSLT
     /* register xslt */
     bt[i-1].next = &bt[i];
@@ -680,11 +785,15 @@ int yaz_record_conv_opac_record(yaz_record_conv_t p,
     else
     {
         struct marc_info *mi = r->info;
+        const char *input_charset = mi->input_charset;
+        yaz_iconv_t cd;
 
         WRBUF res = wrbuf_alloc();
         yaz_marc_t mt = yaz_marc_create();
-        yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
-                                        mi->input_charset);
+
+        if (yaz_opac_check_marc21_coding(input_charset, input_record))
+            input_charset = "utf-8";
+        cd = yaz_iconv_open(mi->output_charset, input_charset);
 
         wrbuf_rewind(p->wr_error);
         yaz_marc_xml(mt, mi->output_format_mode);