Fix yaz_use_attribute_create (uninit memory)
[yaz-moved-to-github.git] / src / record_render.c
index d4180dd..1291858 100644 (file)
@@ -1,11 +1,14 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2010 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 /**
  * \file record_render.c
  * \brief Render Z39.50 records (NamePlusRecord)
  */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <assert.h>
 #include <string.h>
 #include <yaz/yaz-iconv.h>
 #include <yaz/proto.h>
 #include <yaz/oid_db.h>
+#include <yaz/nmem_xml.h>
+#include <yaz/base64.h>
+
+#if YAZ_HAVE_XML2
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+#endif
 
-static yaz_iconv_t iconv_create_charset(const char *record_charset)
+static yaz_iconv_t iconv_create_charset(const char *record_charset,
+                                        yaz_iconv_t *cd2)
 {
-    char to[40];
-    char from[40];
+    char charset_buf[40];
     yaz_iconv_t cd = 0;
-
-    *from = '\0';
-    strcpy(to, "UTF-8");
+    char *from_set1 = 0;
+    char *from_set2 = 0;
+    char *to_set = "utf-8";
     if (record_charset && *record_charset)
     {
-        /* Use "from,to" or just "from" */
-        const char *cp = strchr(record_charset, ',');
-        size_t clen = strlen(record_charset);
-        if (cp && cp[1])
+        char *cp = charset_buf;
+
+        strncpy(charset_buf, record_charset, sizeof(charset_buf)-1);
+        charset_buf[sizeof(charset_buf)-1] = '\0';
+
+        from_set1 = cp;
+        while (*cp && *cp != ',' && *cp != '/')
+            cp++;
+        if (*cp == '/')
+        {
+            *cp++ = '\0'; /* terminate from_set1 */
+            from_set2 = cp;
+            while (*cp && *cp != ',')
+                cp++;
+        }
+        if (*cp == ',')
         {
-            strncpy( to, cp+1, sizeof(to)-1);
-            to[sizeof(to)-1] = '\0';
-            clen = cp - record_charset;
+            *cp++ = '\0';  /* terminate from_set1 or from_set2 */
+            to_set = cp;
+            while (*cp)
+                cp++;
         }
-        if (clen > sizeof(from)-1)
-            clen = sizeof(from)-1;
-        
-        if (clen)
-            strncpy(from, record_charset, clen);
-        from[clen] = '\0';
     }
-    if (*from && *to)
-        cd = yaz_iconv_open(to, from);
+
+    if (from_set1)
+        cd = yaz_iconv_open(to_set, from_set1);
+    if (cd2)
+    {
+        if (from_set2)
+            *cd2 = yaz_iconv_open(to_set, from_set2);
+        else
+            *cd2 = 0;
+    }
     return cd;
 }
 
@@ -54,7 +79,7 @@ static const char *return_marc_record(WRBUF wrbuf,
                                       const char *buf, int sz,
                                       const char *record_charset)
 {
-    yaz_iconv_t cd = iconv_create_charset(record_charset);
+    yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
     yaz_marc_t mt = yaz_marc_create();
     const char *ret_string = 0;
 
@@ -63,8 +88,7 @@ static const char *return_marc_record(WRBUF wrbuf,
     yaz_marc_xml(mt, marc_type);
     if (yaz_marc_decode_wrbuf(mt, buf, sz, wrbuf) > 0)
     {
-        if (len)
-            *len = wrbuf_len(wrbuf);
+        *len = wrbuf_len(wrbuf);
         ret_string = wrbuf_cstr(wrbuf);
     }
     yaz_marc_destroy(mt);
@@ -79,20 +103,26 @@ static const char *return_opac_record(WRBUF wrbuf,
                                       Z_OPACRecord *opac_rec,
                                       const char *record_charset)
 {
-    yaz_iconv_t cd = iconv_create_charset(record_charset);
+    yaz_iconv_t cd2;
+    yaz_iconv_t cd = iconv_create_charset(record_charset, &cd2);
     yaz_marc_t mt = yaz_marc_create();
 
     if (cd)
         yaz_marc_iconv(mt, cd);
     yaz_marc_xml(mt, marc_type);
 
-    yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
+    if (cd2)
+        yaz_opac_decode_wrbuf2(mt, opac_rec, wrbuf, cd2);
+    else
+        yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
+
     yaz_marc_destroy(mt);
 
     if (cd)
         yaz_iconv_close(cd);
-    if (len)
-        *len = wrbuf_len(wrbuf);
+    if (cd2)
+        yaz_iconv_close(cd2);
+    *len = wrbuf_len(wrbuf);
     return wrbuf_cstr(wrbuf);
 }
 
@@ -101,7 +131,7 @@ static const char *return_string_record(WRBUF wrbuf,
                                         const char *buf, int sz,
                                         const char *record_charset)
 {
-    yaz_iconv_t cd = iconv_create_charset(record_charset);
+    yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
 
     if (cd)
     {
@@ -112,8 +142,7 @@ static const char *return_string_record(WRBUF wrbuf,
         sz = wrbuf_len(wrbuf);
         yaz_iconv_close(cd);
     }
-    if (len)
-        *len = sz;
+    *len = sz;
     return buf;
 }
 
@@ -138,7 +167,10 @@ static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
                                     charset);
     else if (r->which == Z_External_octet)
     {
-        if (yaz_oid_is_iso2709(oid))
+        if (oid_oidcmp(oid, yaz_oid_recsyn_xml)
+            && oid_oidcmp(oid, yaz_oid_recsyn_application_xml)
+            && oid_oidcmp(oid, yaz_oid_recsyn_mab)
+            && oid_oidcmp(oid, yaz_oid_recsyn_html))
         {
             const char *ret_buf = return_marc_record(
                 wrbuf, marctype, len,
@@ -147,8 +179,8 @@ static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
                 charset);
             if (ret_buf)
                 return ret_buf;
-            /* bad ISO2709. Return fail unless raw (ISO2709) is wanted */
-            if (marctype != YAZ_MARC_ISO2709)
+            /* not ISO2709. Return fail unless raw (ISO2709) is wanted */
+            if (yaz_oid_is_iso2709(oid) && marctype != YAZ_MARC_ISO2709)
                 return 0;
         }
         return return_string_record(wrbuf, len,
@@ -166,7 +198,7 @@ static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
     }
     return 0;
 }
-    
+
 static const char *get_record_format(WRBUF wrbuf, int *len,
                                      Z_NamePlusRecord *npr,
                                      int marctype, const char *charset,
@@ -174,7 +206,7 @@ static const char *get_record_format(WRBUF wrbuf, int *len,
 {
     const char *res = return_record_wrbuf(wrbuf, len, npr, marctype, charset);
 #if YAZ_HAVE_XML2
-    if (*format == '1' && len)
+    if (*format == '1')
     {
         /* try to XML format res */
         xmlDocPtr doc;
@@ -191,21 +223,133 @@ static const char *get_record_format(WRBUF wrbuf, int *len,
             xmlFreeDoc(doc);
             res = wrbuf_cstr(wrbuf);
             *len = wrbuf_len(wrbuf);
-        } 
+        }
     }
 #endif
     return res;
 }
 
+#if YAZ_HAVE_XML2
+static int replace_node(NMEM nmem, xmlNode *ptr,
+                        const char *type_spec, char *record_buf)
+{
+    int ret = -1;
+    const char *res;
+    int len;
+    int m_len;
+    WRBUF wrbuf = wrbuf_alloc();
+    ODR odr = odr_createmem(ODR_ENCODE);
+    Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
+    npr->which = Z_NamePlusRecord_databaseRecord;
+
+    if (atoi_n_check(record_buf, 5, &m_len))
+        npr->u.databaseRecord =
+            z_ext_record_usmarc(odr, record_buf, strlen(record_buf));
+    else
+        npr->u.databaseRecord =
+            z_ext_record_xml(odr, record_buf, strlen(record_buf));
+    res = yaz_record_render(npr, 0, wrbuf, type_spec, &len);
+    if (res)
+    {
+        xmlDoc *doc = xmlParseMemory(res, strlen(res));
+        if (doc)
+        {
+            xmlNode *nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1);
+            xmlReplaceNode(ptr, nptr);
+            xmlFreeDoc(doc);
+        }
+        else
+        {
+            xmlNode *nptr = xmlNewText(BAD_CAST res);
+            xmlReplaceNode(ptr, nptr);
+        }
+        ret = 0;
+    }
+    wrbuf_destroy(wrbuf);
+    odr_destroy(odr);
+    return ret;
+}
+#endif
+
+static const char *base64_render(NMEM nmem, WRBUF wrbuf,
+                                 const char *buf, int *len,
+                                 const char *expr, const char *type_spec)
+{
+#if YAZ_HAVE_XML2
+    xmlDocPtr doc = xmlParseMemory(buf, *len);
+    if (doc)
+    {
+        xmlChar *buf_out;
+        int len_out;
+        xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+        if (xpathCtx)
+        {
+            xmlXPathObjectPtr xpathObj =
+                xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx);
+            if (xpathObj)
+            {
+                xmlNodeSetPtr nodes = xpathObj->nodesetval;
+                if (nodes)
+                {
+                    int i;
+                    for (i = 0; i < nodes->nodeNr; i++)
+                    {
+                        xmlNode *ptr = nodes->nodeTab[i];
+                        if (ptr->type == XML_TEXT_NODE)
+                        {
+                            const char *input =
+                                nmem_text_node_cdata(ptr, nmem);
+                            char *output = nmem_malloc(
+                                nmem, strlen(input) + 1);
+                            if (yaz_base64decode(input, output) == 0)
+                            {
+                                if (!replace_node(nmem, ptr, type_spec, output))
+                                {
+                                    /* replacement OK */
+                                    xmlFreeNode(ptr);
+                                    /* unset below to avoid a bad reference in
+                                       xmlXPathFreeObject below */
+                                    nodes->nodeTab[i] = 0;
+                                }
+                            }
+                        }
+                    }
+                }
+                xmlXPathFreeObject(xpathObj);
+            }
+            xmlXPathFreeContext(xpathCtx);
+        }
+        xmlDocDumpMemory(doc, &buf_out, &len_out);
+        if (buf_out)
+        {
+            wrbuf_rewind(wrbuf);
+            wrbuf_write(wrbuf, (const char *) buf_out, len_out);
+            buf = wrbuf_cstr(wrbuf);
+            *len = len_out;
+        }
+        xmlFreeDoc(doc);
+        xmlFree(buf_out);
+    }
+#endif
+    return buf;
+}
+
 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
                               WRBUF wrbuf,
                               const char *type_spec, int *len)
 {
+    const char *ret = 0;
+    NMEM nmem = 0;
+    char *base64_xpath = 0;
     size_t i;
     char type[40];
     char charset[40];
     char format[3];
     const char *cp = type_spec;
+    int len0;
+
+    if (!len)
+        len = &len0;
 
     for (i = 0; cp[i] && cp[i] != ';' && cp[i] != ' ' && i < sizeof(type)-1;
          i++)
@@ -226,6 +370,8 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
         {
             size_t j = 0;
             i = i + 8; /* skip charset= */
+            while (cp[i] == ' ')
+                i++;
             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
             {
                 if (j < sizeof(charset)-1)
@@ -235,31 +381,44 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
         }
         else if (!strncmp(cp + i, "format=", 7))
         {
-            size_t j = 0; 
+            size_t j = 0;
             i = i + 7;
+            while (cp[i] == ' ')
+                i++;
             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
             {
                 if (j < sizeof(format)-1)
                     format[j++] = cp[i];
             }
             format[j] = '\0';
-        } 
+        }
+        else if (!strncmp(cp + i, "base64=", 7))
+        {
+            size_t i0;
+            i = i + 7;
+            while (cp[i] == ' ')
+                i++;
+            i0 = i;
+            while (cp[i] && cp[i] != ';')
+                i++;
+
+            nmem = nmem_create();
+            base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0);
+        }
     }
     if (!strcmp(type, "database"))
     {
-        if (len)
-            *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
-        return npr->databaseName;
+        *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
+        ret = npr->databaseName;
     }
     else if (!strcmp(type, "schema"))
     {
-        if (len)
-            *len = schema ? strlen(schema) : 0;
-        return schema;
+        *len = schema ? strlen(schema) : 0;
+        ret = schema;
     }
     else if (!strcmp(type, "syntax"))
     {
-        const char *desc = 0;   
+        const char *desc = 0;
         if (npr->which == Z_NamePlusRecord_databaseRecord)
         {
             Z_External *r = (Z_External *) npr->u.databaseRecord;
@@ -267,45 +426,61 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
         }
         if (!desc)
             desc = "none";
-        if (len)
-            *len = strlen(desc);
-        return desc;
+        *len = strlen(desc);
+        ret = desc;
     }
     if (npr->which != Z_NamePlusRecord_databaseRecord)
-        return 0;
-
-    /* from now on - we have a database record .. */
-    if (!strcmp(type, "render"))
+        ;
+    else if (!strcmp(type, "render"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
     }
     else if (!strcmp(type, "xml"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
-                                 format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
+                                format);
     }
     else if (!strcmp(type, "txml"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
-                                 format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
+                                format);
+    }
+    else if (!strcmp(type, "json"))
+    {
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_JSON, charset,
+                                format);
     }
     else if (!strcmp(type, "raw"))
     {
-        return get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
-            format);
+        ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
+                                format);
     }
     else if (!strcmp(type, "ext"))
     {
-        if (len) *len = -1;
-        return (const char *) npr->u.databaseRecord;
+        *len = -1;
+        ret = (const char *) npr->u.databaseRecord;
     }
     else if (!strcmp(type, "opac"))
     {
         if (npr->u.databaseRecord->which == Z_External_OPAC)
-            return get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
-                                     format);
+            ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
+                                    format);
     }
-    return 0;
+
+    if (base64_xpath && *len != -1)
+    {
+        char *type_spec = nmem_malloc(nmem,
+                                      strlen(type) + strlen(charset) + 11);
+        strcpy(type_spec, type);
+        if (*charset)
+        {
+            strcat(type_spec, "; charset=");
+            strcat(type_spec, charset);
+        }
+        ret = base64_render(nmem, wrbuf, ret, len, base64_xpath, type_spec);
+    }
+    nmem_destroy(nmem);
+    return ret;
 }
 
 /*