record_render: base64 decoding of embedded records
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 18 Jun 2012 13:05:26 +0000 (15:05 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 18 Jun 2012 13:05:26 +0000 (15:05 +0200)
include/yaz/record_render.h
src/record_render.c
test/test_embed_record.c

index ad2fa1a..66eacab 100644 (file)
@@ -48,6 +48,10 @@ YAZ_BEGIN_CDECL
     \param len length of returned buffer
     \retval !=0 buffer
     \retval =0 record could not be rendered
     \param len length of returned buffer
     \retval !=0 buffer
     \retval =0 record could not be rendered
+
+    txml; charset=marc-8
+    xml; charset=utf-8
+    xml; charset=utf-8; base64(/rec/my/text(), txml; charset=marc-8)
 */
 YAZ_EXPORT
 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
 */
 YAZ_EXPORT
 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
index 7905016..291462b 100644 (file)
 #include <yaz/yaz-iconv.h>
 #include <yaz/proto.h>
 #include <yaz/oid_db.h>
 #include <yaz/yaz-iconv.h>
 #include <yaz/proto.h>
 #include <yaz/oid_db.h>
+#include <yaz/nmem_xml.h>
+#include <yaz/base64.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
 
 static yaz_iconv_t iconv_create_charset(const char *record_charset,
                                         yaz_iconv_t *cd2)
 
 static yaz_iconv_t iconv_create_charset(const char *record_charset,
                                         yaz_iconv_t *cd2)
@@ -222,10 +227,116 @@ static const char *get_record_format(WRBUF wrbuf, int *len,
     return res;
 }
 
     return res;
 }
 
+static int replace_node(NMEM nmem, xmlNode *ptr,
+                        const char *type_spec, char *record_buf)
+{
+    int ret = -1;
+    const char *res;
+    int len;
+    int m_len;
+    WRBUF wrbuf = wrbuf_alloc();
+    ODR odr = odr_createmem(ODR_ENCODE);
+    Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
+    npr->which = Z_NamePlusRecord_databaseRecord;
+
+    if (atoi_n_check(record_buf, 5, &m_len))
+        npr->u.databaseRecord =
+            z_ext_record_usmarc(odr, record_buf, strlen(record_buf));
+    else
+        npr->u.databaseRecord =
+            z_ext_record_xml(odr, record_buf, strlen(record_buf));
+    res = yaz_record_render(npr, 0, wrbuf, type_spec, &len);
+    if (res)
+    {
+        xmlDoc *doc = xmlParseMemory(res, strlen(res));
+        xmlNode *nptr;
+        if (doc)
+        {
+            nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1);
+            xmlReplaceNode(ptr, nptr);
+            xmlFreeDoc(doc);
+        }
+        else
+        {
+            nptr = xmlNewText(BAD_CAST res);
+            xmlReplaceNode(ptr, nptr);
+        }
+        ret = 0;
+    }
+    wrbuf_destroy(wrbuf);
+    odr_destroy(odr);
+    return ret;
+}
+
+static const char *base64_render(NMEM nmem, WRBUF wrbuf,
+                                 const char *buf, int *len,
+                                 const char *expr, const char *type_spec)
+{
+    xmlDocPtr doc = xmlParseMemory(buf, *len);
+    if (doc)
+    {
+        xmlChar *buf_out;
+        int len_out;
+        xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+        if (xpathCtx)
+        {
+            xmlXPathObjectPtr xpathObj =
+                xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx);
+            if (xpathObj)
+            {
+                xmlNodeSetPtr nodes = xpathObj->nodesetval;
+                if (nodes)
+                {
+                    int i;
+                    for (i = 0; i < nodes->nodeNr; i++)
+                    {
+                        xmlNode *ptr = nodes->nodeTab[i];
+                        if (ptr->type == XML_TEXT_NODE)
+                        {
+                            const char *input =
+                                nmem_text_node_cdata(ptr, nmem);
+                            char *output = nmem_malloc(
+                                nmem, strlen(input) + 1);
+                            if (yaz_base64decode(input, output) == 0)
+                            {
+                                if (!replace_node(nmem, ptr, type_spec, output))
+                                {
+                                    /* replacement OK */
+                                    xmlFreeNode(ptr);
+                                    /* unset below to avoid a bad reference in
+                                       xmlXPathFreeObject below */
+                                    nodes->nodeTab[i] = 0;
+                                }
+                            }
+                        }
+                    }
+                }
+                xmlXPathFreeObject(xpathObj);
+            }
+            xmlXPathFreeContext(xpathCtx);
+        }
+        xmlDocDumpMemory(doc, &buf_out, &len_out);
+        if (buf_out)
+        {
+            wrbuf_rewind(wrbuf);
+            wrbuf_write(wrbuf, (const char *) buf_out, len_out);
+            buf = wrbuf_cstr(wrbuf);
+            *len = len_out;
+        }
+        xmlFreeDoc(doc);
+        xmlFree(buf_out);
+    }
+    return buf;
+}
+
 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
                               WRBUF wrbuf,
                               const char *type_spec, int *len)
 {
 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
                               WRBUF wrbuf,
                               const char *type_spec, int *len)
 {
+    const char *ret = 0;
+    NMEM nmem = 0;
+    char *base64_xpath = 0;
+    char *base64_type_spec = 0;
     size_t i;
     char type[40];
     char charset[40];
     size_t i;
     char type[40];
     char charset[40];
@@ -269,18 +380,47 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
             }
             format[j] = '\0';
         } 
             }
             format[j] = '\0';
         } 
+        else if (!strncmp(cp + i, "base64", 6))
+        {
+            i = i + 6;
+
+            while (cp[i] == ' ')
+                i++;
+            if (cp[i] == '(')
+            {
+                size_t i0;
+                nmem = nmem_create();
+                i++;
+                while (cp[i] == ' ')
+                    i++;
+                i0 = i;
+                while (cp[i] != ',' && cp[i])
+                    i++;
+                base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0);
+                if (cp[i])
+                    i++;
+                while (cp[i] == ' ')
+                    i++;
+                i0 = i;
+                while (cp[i] != ')' && cp[i])
+                    i++;
+                base64_type_spec = nmem_strdupn(nmem, cp + i0, i - i0);
+                if (cp[i])
+                    i++;
+            }
+        } 
     }
     if (!strcmp(type, "database"))
     {
         if (len)
             *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
     }
     if (!strcmp(type, "database"))
     {
         if (len)
             *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
-        return npr->databaseName;
+        ret = npr->databaseName;
     }
     else if (!strcmp(type, "schema"))
     {
         if (len)
             *len = schema ? strlen(schema) : 0;
     }
     else if (!strcmp(type, "schema"))
     {
         if (len)
             *len = schema ? strlen(schema) : 0;
-        return schema;
+        ret = schema;
     }
     else if (!strcmp(type, "syntax"))
     {
     }
     else if (!strcmp(type, "syntax"))
     {
@@ -294,43 +434,46 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
             desc = "none";
         if (len)
             *len = strlen(desc);
             desc = "none";
         if (len)
             *len = strlen(desc);
-        return desc;
+        ret = desc;
     }
     if (npr->which != Z_NamePlusRecord_databaseRecord)
     }
     if (npr->which != Z_NamePlusRecord_databaseRecord)
-        return 0;
-
-    /* from now on - we have a database record .. */
-    if (!strcmp(type, "render"))
+        ;
+    else if (!strcmp(type, "render"))
     {
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
     }
     else if (!strcmp(type, "xml"))
     {
     }
     else if (!strcmp(type, "xml"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
-                                 format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
+                                format);
     }
     else if (!strcmp(type, "txml"))
     {
     }
     else if (!strcmp(type, "txml"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
-                                 format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
+                                format);
     }
     else if (!strcmp(type, "raw"))
     {
     }
     else if (!strcmp(type, "raw"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
-            format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
+                                format);
     }
     else if (!strcmp(type, "ext"))
     {
         if (len) *len = -1;
     }
     else if (!strcmp(type, "ext"))
     {
         if (len) *len = -1;
-        return (const char *) npr->u.databaseRecord;
+        ret = (const char *) npr->u.databaseRecord;
     }
     else if (!strcmp(type, "opac"))
     {
         if (npr->u.databaseRecord->which == Z_External_OPAC)
     }
     else if (!strcmp(type, "opac"))
     {
         if (npr->u.databaseRecord->which == Z_External_OPAC)
-            return get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
-                                     format);
+            ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
+                                    format);
     }
     }
-    return 0;
+
+    if (base64_xpath)
+        ret = base64_render(nmem, wrbuf,
+                            ret, len, base64_xpath, base64_type_spec);
+    nmem_destroy(nmem);
+    return ret;
 }
 
 /*
 }
 
 /*
index b3c5393..aac2987 100644 (file)
 #include <yaz/wrbuf.h>
 #include <string.h>
 #include <yaz/log.h>
 #include <yaz/wrbuf.h>
 #include <string.h>
 #include <yaz/log.h>
+#include <yaz/record_render.h>
 
 #if YAZ_HAVE_XML2
 
 #include <yaz/base64.h>
 #include <yaz/marcdisp.h>
 
 #if YAZ_HAVE_XML2
 
 #include <yaz/base64.h>
 #include <yaz/marcdisp.h>
+#include <yaz/proto.h>
+#include <yaz/prt-ext.h>
 
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 
 
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 
-void test(void)
+void test1(void)
 {
   char base_enc[] = 
     "MDA3NjZuYW0gIDIyMDAyNjU4YSA0NTAwMDAxMDAxMjAwMDAwMDAzMDAwNjAwMDEyMDA1MDAx"
 {
   char base_enc[] = 
     "MDA3NjZuYW0gIDIyMDAyNjU4YSA0NTAwMDAxMDAxMjAwMDAwMDAzMDAwNjAwMDEyMDA1MDAx"
@@ -44,13 +47,14 @@ void test(void)
 
     int marc_size = strlen(bin_marc);
     char out_rec[1000];
 
     int marc_size = strlen(bin_marc);
     char out_rec[1000];
+    yaz_marc_t marc = yaz_marc_create();
+    WRBUF buf = wrbuf_alloc();
+
     yaz_base64decode(base_enc, out_rec);
     YAZ_CHECK(strcmp(out_rec, bin_marc) == 0);
 
     yaz_base64decode(base_enc, out_rec);
     YAZ_CHECK(strcmp(out_rec, bin_marc) == 0);
 
-    yaz_marc_t marc = yaz_marc_create();
     yaz_marc_read_iso2709(marc, out_rec, marc_size);
 
     yaz_marc_read_iso2709(marc, out_rec, marc_size);
 
-    WRBUF buf = wrbuf_alloc();
     yaz_marc_write_marcxml(marc, buf);
 
     yaz_marc_destroy(marc);
     yaz_marc_write_marcxml(marc, buf);
 
     yaz_marc_destroy(marc);
@@ -59,12 +63,166 @@ void test(void)
 }
 #endif
 
 }
 #endif
 
+static int test_render(const char *type_spec, int is_marc, const char *input,
+                    const char *expected_output)
+{
+    ODR odr = odr_createmem(ODR_ENCODE);
+    const char *actual_output;
+    int actual_len;
+    int res = 0;
+    WRBUF wrbuf = wrbuf_alloc();
+
+    Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
+    npr->which = Z_NamePlusRecord_databaseRecord;
+    if (is_marc)
+        npr->u.databaseRecord = z_ext_record_usmarc(odr, input, strlen(input));
+    else
+        npr->u.databaseRecord = z_ext_record_xml(odr, input, strlen(input));
+
+    actual_output = yaz_record_render(npr, 0, wrbuf, type_spec, &actual_len);
+
+    if (actual_output && expected_output)
+    {
+        if (strlen(expected_output) == actual_len &&
+            !memcmp(expected_output, actual_output, actual_len))
+            res = 1;
+        else
+        {
+            yaz_log(YLOG_LOG, "Got result");
+            yaz_log(YLOG_LOG, "%.*s", actual_len, actual_output);
+            yaz_log(YLOG_LOG, "Expected result");
+            yaz_log(YLOG_LOG, "%s", expected_output);
+        }
+    }
+    else if (!actual_output && !expected_output)
+        res = 1;
+    else if (!actual_output && expected_output)
+    {
+        yaz_log(YLOG_LOG, "Got null result, but expected");
+        yaz_log(YLOG_LOG, "%s", expected_output);
+    }
+    else
+    {
+        yaz_log(YLOG_LOG, "Got result, but expected no result");
+        yaz_log(YLOG_LOG, "%.*s", actual_len, actual_output);
+    }
+    wrbuf_destroy(wrbuf);
+    odr_destroy(odr);
+    return res;
+}
+
 int main(int argc, char **argv)
 {
     YAZ_CHECK_INIT(argc, argv);
     YAZ_CHECK_LOG();
 #if YAZ_HAVE_XML2
 int main(int argc, char **argv)
 {
     YAZ_CHECK_INIT(argc, argv);
     YAZ_CHECK_LOG();
 #if YAZ_HAVE_XML2
-    test();
+    test1();
+    YAZ_CHECK(test_render("xml", 0, "<my/>", "<my/>"));
+
+    YAZ_CHECK(test_render(
+                  "xml", 1, 
+                  "\x30\x30\x31\x33\x38\x6E\x61\x6D\x20\x20\x32\x32\x30\x30\x30\x37"
+                  "\x33\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
+                  "\x30\x30\x30\x30\x30\x30\x33\x30\x30\x30\x34\x30\x30\x30\x31\x33"
+                  "\x31\x30\x30\x30\x30\x31\x37\x30\x30\x30\x31\x37\x32\x34\x35\x30"
+                  "\x30\x33\x30\x30\x30\x30\x33\x34\x1E\x20\x20\x20\x31\x31\x32\x32"
+                  "\x34\x34\x36\x36\x20\x1E\x44\x4C\x43\x1E\x31\x30\x1F\x61\x4A\x61"
+                  "\x63\x6B\x20\x43\x6F\x6C\x6C\x69\x6E\x73\x1E\x31\x30\x1F\x61\x48"
+                  "\x6F\x77\x20\x74\x6F\x20\x70\x72\x6F\x67\x72\x61\x6D\x20\x61\x20"
+                  "\x63\x6F\x6D\x70\x75\x74\x65\x72\x1E\x1D",
+                  "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
+                  "  <leader>00138nam a22000738a 4500</leader>\n"
+                  "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
+                  "  <controlfield tag=\"003\">DLC</controlfield>\n"
+                  "  <datafield tag=\"100\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">Jack Collins</subfield>\n"
+                  "  </datafield>\n"
+                  "  <datafield tag=\"245\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">How to program a computer</subfield>\n"
+                  "  </datafield>\n"
+                  "</record>\n"));
+
+    YAZ_CHECK(test_render("xml", 0, "<my/>", "<my/>"));
+
+    YAZ_CHECK(test_render(
+                  "xml; base64(/my/text(),xml)", 0,
+                  "<my>"
+                  "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw"
+                  "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93"
+                  "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d"
+                  "</my>",
+                  "<?xml version=\"1.0\"?>\n"
+                  "<my><record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
+                  "  <leader>00138nam a22000738a 4500</leader>\n"
+                  "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
+                  "  <controlfield tag=\"003\">DLC</controlfield>\n"
+                  "  <datafield tag=\"100\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">Jack Collins</subfield>\n"
+                  "  </datafield>\n"
+                  "  <datafield tag=\"245\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">How to program a computer</subfield>\n"
+                  "  </datafield>\n"
+                  "</record></my>\n"));
+
+    YAZ_CHECK(test_render(
+                  "xml; charset=utf-8; base64(/my/text(),xml)", 0,
+                  "<my>"
+                  "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw"
+                  "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93"
+                  "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d"
+                  "</my>",
+                  "<?xml version=\"1.0\"?>\n"
+                  "<my><record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
+                  "  <leader>00138nam a22000738a 4500</leader>\n"
+                  "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
+                  "  <controlfield tag=\"003\">DLC</controlfield>\n"
+                  "  <datafield tag=\"100\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">Jack Collins</subfield>\n"
+                  "  </datafield>\n"
+                  "  <datafield tag=\"245\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">How to program a computer</subfield>\n"
+                  "  </datafield>\n"
+                  "</record></my>\n"));
+
+    YAZ_CHECK(test_render(
+                  "xml; base64(/my/text(),xml);charset=utf-8", 0,
+                  "<my>"
+                  "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw"
+                  "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93"
+                  "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d"
+                  "</my>",
+                  "<?xml version=\"1.0\"?>\n"
+                  "<my><record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
+                  "  <leader>00138nam a22000738a 4500</leader>\n"
+                  "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
+                  "  <controlfield tag=\"003\">DLC</controlfield>\n"
+                  "  <datafield tag=\"100\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">Jack Collins</subfield>\n"
+                  "  </datafield>\n"
+                  "  <datafield tag=\"245\" ind1=\"1\" ind2=\"0\">\n"
+                  "    <subfield code=\"a\">How to program a computer</subfield>\n"
+                  "  </datafield>\n"
+                  "</record></my>\n"));
+
+    YAZ_CHECK(test_render(
+                  "xml; base64(/my/text(),txml;charset=utf-8)", 0,
+                  "<my>"
+                  "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw"
+                  "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93"
+                  "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d"
+                  "</my>",
+                  "<?xml version=\"1.0\"?>\n"
+                  "<my><r xmlns=\"http://www.indexdata.com/turbomarc\">\n"
+                  "  <l>00138nam a22000738a 4500</l>\n"
+                  "  <c001>   11224466 </c001>\n"
+                  "  <c003>DLC</c003>\n"
+                  "  <d100 i1=\"1\" i2=\"0\">\n"
+                  "    <sa>Jack Collins</sa>\n"
+                  "  </d100>\n"
+                  "  <d245 i1=\"1\" i2=\"0\">\n"
+                  "    <sa>How to program a computer</sa>\n"
+                  "  </d245>\n"
+                  "</r></my>\n"));
 #endif
     YAZ_CHECK_TERM;
 }
 #endif
     YAZ_CHECK_TERM;
 }