record_conv: selection by X-Path YAZ-811
[yaz-moved-to-github.git] / src / record_conv.c
index ff95f92..1595faf 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2013 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -25,6 +25,8 @@
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 #include <libxml/xinclude.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
 #if YAZ_HAVE_XSLT
 #include <libxslt/xsltutils.h>
 #include <libxslt/transform.h>
@@ -317,6 +319,127 @@ static void destroy_xslt(void *vinfo)
 /* YAZ_HAVE_XSLT */
 #endif
 
+struct select_info {
+    NMEM nmem;
+    char *xpath_expr;
+};
+
+static void *construct_select(const xmlNode *ptr,
+                              const char *path, WRBUF wr_error)
+{
+    if (strcmp((const char *) ptr->name, "select"))
+        return 0;
+    else
+    {
+        struct _xmlAttr *attr;
+        NMEM nmem = nmem_create();
+        struct select_info *info = nmem_malloc(nmem, sizeof(*info));
+
+        info->nmem = nmem;
+        info->xpath_expr = 0;
+        for (attr = ptr->properties; attr; attr = attr->next)
+        {
+            if (!xmlStrcmp(attr->name, BAD_CAST "path") &&
+                attr->children && attr->children->type == XML_TEXT_NODE)
+                info->xpath_expr =
+                    nmem_strdup(nmem, (const char *) attr->children->content);
+            else
+            {
+                wrbuf_printf(wr_error, "Bad attribute '%s'"
+                             "Expected xpath.", attr->name);
+                nmem_destroy(nmem);
+                return 0;
+            }
+        }
+        return info;
+    }
+}
+
+static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error)
+{
+    int ret = 0;
+    struct select_info *info = vinfo;
+
+    xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+                                   wrbuf_len(record));
+    if (!doc)
+    {
+        wrbuf_printf(wr_error, "xmlParseMemory failed");
+        ret = -1;
+    }
+    else
+    {
+        xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+        if (xpathCtx && info->xpath_expr)
+        {
+            xmlXPathObjectPtr xpathObj =
+                xmlXPathEvalExpression((const xmlChar *) info->xpath_expr,
+                                       xpathCtx);
+            if (xpathObj)
+            {
+                xmlNodeSetPtr nodes = xpathObj->nodesetval;
+                wrbuf_rewind(record);
+                if (nodes)
+                {
+                    int i;
+                    for (i = 0; i < nodes->nodeNr; i++)
+                    {
+                        xmlNode *ptr = nodes->nodeTab[i];
+                        fprintf(stderr, "xpath result %d type=%d\n", i,
+                            ptr->type);
+                        if (ptr->type == XML_ELEMENT_NODE)
+                            ptr = ptr->children;
+                        if (ptr->type == XML_TEXT_NODE)
+                            for (; ptr; ptr = ptr->next)
+                                wrbuf_puts(record, (const char *) ptr->content);
+                    }
+                }
+                xmlXPathFreeObject(xpathObj);
+            }
+            xmlXPathFreeContext(xpathCtx);
+        }
+        xmlFreeDoc(doc);
+    }
+    return ret;
+}
+
+static void destroy_select(void *info)
+{
+}
+
+
+static void *construct_solrmarc(const xmlNode *ptr,
+                                const char *path, WRBUF wr_error)
+{
+    if (strcmp((const char *) ptr->name, "solrmarc"))
+        return 0;
+    return wr_error; /* any non-null ptr will do; we don't use it later*/
+}
+
+static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error)
+{
+    WRBUF w = wrbuf_alloc();
+    const char *buf = wrbuf_buf(record);
+    size_t i, sz = wrbuf_len(record);
+    for (i = 0; i < sz; i++)
+    {
+        int ch;
+        if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';'
+            && atoi_n_check(buf+i+1, 2, &ch))
+            i += 3;
+        else
+            ch = buf[i];
+        wrbuf_putc(w, ch);
+    }
+    wrbuf_rewind(record);
+    wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w));
+    wrbuf_destroy(w);
+    return 0;
+}
+
+static void destroy_solrmarc(void *info)
+{
+}
 
 static void *construct_marc(const xmlNode *ptr,
                             const char *path, WRBUF wr_error)
@@ -332,7 +455,6 @@ static void *construct_marc(const xmlNode *ptr,
         nmem_destroy(nmem);
         return 0;
     }
-
     info->nmem = nmem;
     info->input_charset = 0;
     info->output_charset = 0;
@@ -388,6 +510,10 @@ static void *construct_marc(const xmlNode *ptr,
         if (!info->input_charset && info->output_charset)
             info->input_charset = "utf-8";
     }
+    else if (!strcmp(input_format, "json"))
+    {
+        info->input_format_mode = YAZ_MARC_JSON;
+    }
     else
     {
         wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
@@ -431,6 +557,12 @@ static void *construct_marc(const xmlNode *ptr,
         if (info->input_charset && !info->output_charset)
             info->output_charset = "utf-8";
     }
+    else if (!strcmp(output_format, "json"))
+    {
+        info->output_format_mode = YAZ_MARC_JSON;
+        if (info->input_charset && !info->output_charset)
+            info->output_charset = "utf-8";
+    }
     else
     {
         wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
@@ -478,23 +610,25 @@ static void *construct_marc(const xmlNode *ptr,
 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
 {
     struct marc_info *mi = info;
+    const char *input_charset = mi->input_charset;
     int ret = 0;
-
-    yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
     yaz_marc_t mt = yaz_marc_create();
 
     yaz_marc_xml(mt, mi->output_format_mode);
     if (mi->leader_spec)
         yaz_marc_leader_spec(mt, mi->leader_spec);
 
-    if (cd)
-        yaz_marc_iconv(mt, cd);
     if (mi->input_format_mode == YAZ_MARC_ISO2709)
     {
         int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
                                        wrbuf_len(record));
         if (sz > 0)
+        {
+            if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record),
+                                             wrbuf_len(record)))
+                input_charset = "utf-8";
             ret = 0;
+        }
         else
             ret = -1;
     }
@@ -523,13 +657,18 @@ static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
     }
     if (ret == 0)
     {
+        yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset);
+
+        if (cd)
+            yaz_marc_iconv(mt, cd);
+
         wrbuf_rewind(record);
         ret = yaz_marc_write_mode(mt, record);
         if (ret)
             wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
+        if (cd)
+            yaz_iconv_close(cd);
     }
-    if (cd)
-        yaz_iconv_close(cd);
     yaz_marc_destroy(mt);
     return ret;
 }
@@ -544,24 +683,33 @@ static void destroy_marc(void *info)
 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
                                 struct yaz_record_conv_type *types)
 {
-    struct yaz_record_conv_type bt[2];
+    struct yaz_record_conv_type bt[4];
+    size_t i = 0;
 
     /* register marc */
-    bt[0].construct = construct_marc;
-    bt[0].convert = convert_marc;
-    bt[0].destroy = destroy_marc;
+    bt[i].construct = construct_marc;
+    bt[i].convert = convert_marc;
+    bt[i++].destroy = destroy_marc;
+
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_solrmarc;
+    bt[i].convert = convert_solrmarc;
+    bt[i++].destroy = destroy_solrmarc;
+
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_select;
+    bt[i].convert = convert_select;
+    bt[i++].destroy = destroy_select;
 
 #if YAZ_HAVE_XSLT
     /* register xslt */
-    bt[0].next = &bt[1];
-    bt[1].next = types;
-    bt[1].construct = construct_xslt;
-    bt[1].convert = convert_xslt;
-    bt[1].destroy = destroy_xslt;
-#else
-    bt[0].next = types;
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_xslt;
+    bt[i].convert = convert_xslt;
+    bt[i++].destroy = destroy_xslt;
 #endif
 
+    bt[i-1].next = types;
     yaz_record_conv_reset(p);
 
     /* parsing element children */
@@ -635,11 +783,15 @@ int yaz_record_conv_opac_record(yaz_record_conv_t p,
     else
     {
         struct marc_info *mi = r->info;
+        const char *input_charset = mi->input_charset;
+        yaz_iconv_t cd;
 
         WRBUF res = wrbuf_alloc();
         yaz_marc_t mt = yaz_marc_create();
-        yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
-                                        mi->input_charset);
+
+        if (yaz_opac_check_marc21_coding(input_charset, input_record))
+            input_charset = "utf-8";
+        cd = yaz_iconv_open(mi->output_charset, input_charset);
 
         wrbuf_rewind(p->wr_error);
         yaz_marc_xml(mt, mi->output_format_mode);
@@ -692,9 +844,6 @@ yaz_record_conv_t yaz_record_conv_create()
     p->wr_error = wrbuf_alloc();
     p->rules = 0;
     p->path = 0;
-#if YAZ_HAVE_EXSLT
-    exsltRegisterAll();
-#endif
     return p;
 }