record_conv: selection by X-Path YAZ-811
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 8 Jan 2015 13:52:55 +0000 (14:52 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 8 Jan 2015 13:52:55 +0000 (14:52 +0100)
doc/book.xml
src/record_conv.c
test/test_record_conv.c

index 1c0d40f..462cb7f 100644 (file)
@@ -7393,21 +7393,25 @@ int cql_transform_rpn2cql_wrbuf(cql_transform_t ct,
        </listitem>
       </varlistentry>
       <varlistentry>
-       <term><literal>xslt</literal></term>
+       <term><literal>select</literal></term>
        <listitem>
         <para>
-         The <literal>xslt</literal> element specifies a conversion
-         via &acro.xslt;. The following attributes may be specified:
+         The <literal>select</literal> selects one or more text nodes
+        and decodes them as XML.
+        The following attributes may be specified:
          <variablelist>
-          <varlistentry><term><literal>stylesheet</literal> (REQUIRED)</term>
+          <varlistentry><term><literal>path</literal> (REQUIRED)</term>
            <listitem>
             <para>
-             Stylesheet file.
+             X-Path expression for selecting text nodes.
             </para>
            </listitem>
           </varlistentry>
          </variablelist>
         </para>
+        <para>
+         This conversion is available in YAZ 5.8.0 and later.
+        </para>
        </listitem>
       </varlistentry>
       <varlistentry>
@@ -7425,6 +7429,24 @@ int cql_transform_rpn2cql_wrbuf(cql_transform_t ct,
         </para>
        </listitem>
       </varlistentry>
+      <varlistentry>
+       <term><literal>xslt</literal></term>
+       <listitem>
+        <para>
+         The <literal>xslt</literal> element specifies a conversion
+         via &acro.xslt;. The following attributes may be specified:
+         <variablelist>
+          <varlistentry><term><literal>stylesheet</literal> (REQUIRED)</term>
+           <listitem>
+            <para>
+             Stylesheet file.
+            </para>
+           </listitem>
+          </varlistentry>
+         </variablelist>
+        </para>
+       </listitem>
+      </varlistentry>
      </variablelist>
     </para>
    </sect2>
index 9ab5b71..1595faf 100644 (file)
@@ -25,6 +25,8 @@
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 #include <libxml/xinclude.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
 #if YAZ_HAVE_XSLT
 #include <libxslt/xsltutils.h>
 #include <libxslt/transform.h>
@@ -317,6 +319,95 @@ static void destroy_xslt(void *vinfo)
 /* YAZ_HAVE_XSLT */
 #endif
 
+struct select_info {
+    NMEM nmem;
+    char *xpath_expr;
+};
+
+static void *construct_select(const xmlNode *ptr,
+                              const char *path, WRBUF wr_error)
+{
+    if (strcmp((const char *) ptr->name, "select"))
+        return 0;
+    else
+    {
+        struct _xmlAttr *attr;
+        NMEM nmem = nmem_create();
+        struct select_info *info = nmem_malloc(nmem, sizeof(*info));
+
+        info->nmem = nmem;
+        info->xpath_expr = 0;
+        for (attr = ptr->properties; attr; attr = attr->next)
+        {
+            if (!xmlStrcmp(attr->name, BAD_CAST "path") &&
+                attr->children && attr->children->type == XML_TEXT_NODE)
+                info->xpath_expr =
+                    nmem_strdup(nmem, (const char *) attr->children->content);
+            else
+            {
+                wrbuf_printf(wr_error, "Bad attribute '%s'"
+                             "Expected xpath.", attr->name);
+                nmem_destroy(nmem);
+                return 0;
+            }
+        }
+        return info;
+    }
+}
+
+static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error)
+{
+    int ret = 0;
+    struct select_info *info = vinfo;
+
+    xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
+                                   wrbuf_len(record));
+    if (!doc)
+    {
+        wrbuf_printf(wr_error, "xmlParseMemory failed");
+        ret = -1;
+    }
+    else
+    {
+        xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+        if (xpathCtx && info->xpath_expr)
+        {
+            xmlXPathObjectPtr xpathObj =
+                xmlXPathEvalExpression((const xmlChar *) info->xpath_expr,
+                                       xpathCtx);
+            if (xpathObj)
+            {
+                xmlNodeSetPtr nodes = xpathObj->nodesetval;
+                wrbuf_rewind(record);
+                if (nodes)
+                {
+                    int i;
+                    for (i = 0; i < nodes->nodeNr; i++)
+                    {
+                        xmlNode *ptr = nodes->nodeTab[i];
+                        fprintf(stderr, "xpath result %d type=%d\n", i,
+                            ptr->type);
+                        if (ptr->type == XML_ELEMENT_NODE)
+                            ptr = ptr->children;
+                        if (ptr->type == XML_TEXT_NODE)
+                            for (; ptr; ptr = ptr->next)
+                                wrbuf_puts(record, (const char *) ptr->content);
+                    }
+                }
+                xmlXPathFreeObject(xpathObj);
+            }
+            xmlXPathFreeContext(xpathCtx);
+        }
+        xmlFreeDoc(doc);
+    }
+    return ret;
+}
+
+static void destroy_select(void *info)
+{
+}
+
+
 static void *construct_solrmarc(const xmlNode *ptr,
                                 const char *path, WRBUF wr_error)
 {
@@ -592,7 +683,7 @@ static void destroy_marc(void *info)
 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
                                 struct yaz_record_conv_type *types)
 {
-    struct yaz_record_conv_type bt[3];
+    struct yaz_record_conv_type bt[4];
     size_t i = 0;
 
     /* register marc */
@@ -605,6 +696,11 @@ int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
     bt[i].convert = convert_solrmarc;
     bt[i++].destroy = destroy_solrmarc;
 
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_select;
+    bt[i].convert = convert_select;
+    bt[i++].destroy = destroy_select;
+
 #if YAZ_HAVE_XSLT
     /* register xslt */
     bt[i-1].next = &bt[i];
index ff9a2a8..afd966e 100644 (file)
@@ -252,6 +252,14 @@ static void tst_convert1(void)
         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
         "#30;\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;\x20\x20"
         "#31;\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;#29;";
+    const char *raw_rec = /* raw is xml-string of marcxml_rec */
+        "<raw>&lt;record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
+        "  &lt;leader>00080nam a22000498a 4500&lt;/leader>\n"
+        "  &lt;controlfield tag=\"001\">   11224466 &lt;/controlfield>\n"
+        "  &lt;datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
+        "    &lt;subfield code=\"a\">   11224466 &lt;/subfield>\n"
+        "  &lt;/datafield>\n"
+        "&lt;/record>\n</raw>\n";
 
     YAZ_CHECK(conv_configure_test("<backend>"
                                   "<marc"
@@ -328,6 +336,13 @@ static void tst_convert1(void)
                                   "</backend>",
                                   0, &p));
     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
+
+    YAZ_CHECK(conv_configure_test("<backend>"
+                                  "<select path=\"/raw\"/>"
+                                  "</backend>",
+                                  0, &p));
+    YAZ_CHECK(conv_convert_test(p, raw_rec, marcxml_rec));
+
     yaz_record_conv_destroy(p);
 }