Support solrmarc decoding in record_conv/retrieval module
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 21 Mar 2014 12:03:26 +0000 (13:03 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 21 Mar 2014 12:03:26 +0000 (13:03 +0100)
doc/tools.xml
src/record_conv.c
test/test_record_conv.c

index 2d9b62b..d66d7a3 100644 (file)
@@ -2402,6 +2402,21 @@ int cql_transform_rpn2cql_wrbuf(cql_transform_t ct,
         </para>
        </listitem>
       </varlistentry>
+
+      <varlistentry><term><literal>solrmarc</literal></term>
+       <listitem>
+        <para>
+         The <literal>solrmarc</literal> decodes solrmarc records.
+         It assumes that the input is pure solrmarc text (no escaping)
+         and will convert all sequences of the form #XX; to a single
+         character of the hexadecimal value as given by XX. The output,
+         presumably, is a valid ISO2709 buffer.
+        </para>
+        <para>
+         This conversion is available in YAZ 5.0.21 and later.
+        </para>
+       </listitem>
+      </varlistentry>
      </variablelist>
     </para>
    </sect2>
index ca1e41e..e67ef00 100644 (file)
@@ -317,6 +317,38 @@ static void destroy_xslt(void *vinfo)
 /* YAZ_HAVE_XSLT */
 #endif
 
+static void *construct_solrmarc(const xmlNode *ptr,
+                                const char *path, WRBUF wr_error)
+{
+    if (strcmp((const char *) ptr->name, "solrmarc"))
+        return 0;
+    return wr_error; /* any non-null ptr will do; we don't use it later*/
+}
+
+static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error)
+{
+    WRBUF w = wrbuf_alloc();
+    const char *buf = wrbuf_buf(record);
+    size_t i, sz = wrbuf_len(record);
+    for (i = 0; i < sz; i++)
+    {
+        int ch;
+        if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';'
+            && atoi_n_check(buf+i+1, 2, &ch))
+            i += 3;
+        else
+            ch = buf[i];
+        wrbuf_putc(w, ch);
+    }
+    wrbuf_rewind(record);
+    wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w));
+    wrbuf_destroy(w);
+    return 0;
+}
+
+static void destroy_solrmarc(void *info)
+{
+}
 
 static void *construct_marc(const xmlNode *ptr,
                             const char *path, WRBUF wr_error)
@@ -332,7 +364,6 @@ static void *construct_marc(const xmlNode *ptr,
         nmem_destroy(nmem);
         return 0;
     }
-
     info->nmem = nmem;
     info->input_charset = 0;
     info->output_charset = 0;
@@ -554,24 +585,28 @@ static void destroy_marc(void *info)
 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
                                 struct yaz_record_conv_type *types)
 {
-    struct yaz_record_conv_type bt[2];
+    struct yaz_record_conv_type bt[3];
+    size_t i = 0;
 
     /* register marc */
-    bt[0].construct = construct_marc;
-    bt[0].convert = convert_marc;
-    bt[0].destroy = destroy_marc;
+    bt[i].construct = construct_marc;
+    bt[i].convert = convert_marc;
+    bt[i++].destroy = destroy_marc;
+
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_solrmarc;
+    bt[i].convert = convert_solrmarc;
+    bt[i++].destroy = destroy_solrmarc;
 
 #if YAZ_HAVE_XSLT
     /* register xslt */
-    bt[0].next = &bt[1];
-    bt[1].next = types;
-    bt[1].construct = construct_xslt;
-    bt[1].convert = convert_xslt;
-    bt[1].destroy = destroy_xslt;
-#else
-    bt[0].next = types;
+    bt[i-1].next = &bt[i];
+    bt[i].construct = construct_xslt;
+    bt[i].convert = convert_xslt;
+    bt[i++].destroy = destroy_xslt;
 #endif
 
+    bt[i-1].next = types;
     yaz_record_conv_reset(p);
 
     /* parsing element children */
index a00ac9e..f388ea4 100644 (file)
@@ -246,6 +246,13 @@ static void tst_convert1(void)
         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
         "\x1F\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x1D";
 
+    const char *solrmarc_rec =
+        "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
+        "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
+        "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
+        "#30;\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;\x20\x20"
+        "#31;\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;#29;";
+
     YAZ_CHECK(conv_configure_test("<backend>"
                                   "<marc"
                                   " inputcharset=\"utf-8\""
@@ -271,6 +278,18 @@ static void tst_convert1(void)
     YAZ_CHECK(conv_convert_test(p, iso2709_rec, marcxml_rec));
     yaz_record_conv_destroy(p);
 
+    YAZ_CHECK(conv_configure_test("<backend>"
+                                  "<solrmarc/>"
+                                  "<marc"
+                                  " outputcharset=\"utf-8\""
+                                  " inputcharset=\"marc-8\""
+                                  " outputformat=\"marcxml\""
+                                  " inputformat=\"marc\""
+                                  "/>"
+                                  "</backend>",
+                                  0, &p));
+    YAZ_CHECK(conv_convert_test(p, solrmarc_rec, marcxml_rec));
+    yaz_record_conv_destroy(p);
 
     YAZ_CHECK(conv_configure_test("<backend>"
                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"