From c78f61142413410e88ed2a83627516bb4fa24b33 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 18 Jun 2012 15:05:26 +0200 Subject: [PATCH] record_render: base64 decoding of embedded records --- include/yaz/record_render.h | 4 + src/record_render.c | 179 ++++++++++++++++++++++++++++++++++++++----- test/test_embed_record.c | 166 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 327 insertions(+), 22 deletions(-) diff --git a/include/yaz/record_render.h b/include/yaz/record_render.h index ad2fa1a..66eacab 100644 --- a/include/yaz/record_render.h +++ b/include/yaz/record_render.h @@ -48,6 +48,10 @@ YAZ_BEGIN_CDECL \param len length of returned buffer \retval !=0 buffer \retval =0 record could not be rendered + + txml; charset=marc-8 + xml; charset=utf-8 + xml; charset=utf-8; base64(/rec/my/text(), txml; charset=marc-8) */ YAZ_EXPORT const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema, diff --git a/src/record_render.c b/src/record_render.c index 7905016..291462b 100644 --- a/src/record_render.c +++ b/src/record_render.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include + +#include +#include static yaz_iconv_t iconv_create_charset(const char *record_charset, yaz_iconv_t *cd2) @@ -222,10 +227,116 @@ static const char *get_record_format(WRBUF wrbuf, int *len, return res; } +static int replace_node(NMEM nmem, xmlNode *ptr, + const char *type_spec, char *record_buf) +{ + int ret = -1; + const char *res; + int len; + int m_len; + WRBUF wrbuf = wrbuf_alloc(); + ODR odr = odr_createmem(ODR_ENCODE); + Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr)); + npr->which = Z_NamePlusRecord_databaseRecord; + + if (atoi_n_check(record_buf, 5, &m_len)) + npr->u.databaseRecord = + z_ext_record_usmarc(odr, record_buf, strlen(record_buf)); + else + npr->u.databaseRecord = + z_ext_record_xml(odr, record_buf, strlen(record_buf)); + res = yaz_record_render(npr, 0, wrbuf, type_spec, &len); + if (res) + { + xmlDoc *doc = xmlParseMemory(res, strlen(res)); + xmlNode *nptr; + if (doc) + { + nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1); + xmlReplaceNode(ptr, nptr); + xmlFreeDoc(doc); + } + else + { + nptr = xmlNewText(BAD_CAST res); + xmlReplaceNode(ptr, nptr); + } + ret = 0; + } + wrbuf_destroy(wrbuf); + odr_destroy(odr); + return ret; +} + +static const char *base64_render(NMEM nmem, WRBUF wrbuf, + const char *buf, int *len, + const char *expr, const char *type_spec) +{ + xmlDocPtr doc = xmlParseMemory(buf, *len); + if (doc) + { + xmlChar *buf_out; + int len_out; + xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); + if (xpathCtx) + { + xmlXPathObjectPtr xpathObj = + xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx); + if (xpathObj) + { + xmlNodeSetPtr nodes = xpathObj->nodesetval; + if (nodes) + { + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNode *ptr = nodes->nodeTab[i]; + if (ptr->type == XML_TEXT_NODE) + { + const char *input = + nmem_text_node_cdata(ptr, nmem); + char *output = nmem_malloc( + nmem, strlen(input) + 1); + if (yaz_base64decode(input, output) == 0) + { + if (!replace_node(nmem, ptr, type_spec, output)) + { + /* replacement OK */ + xmlFreeNode(ptr); + /* unset below to avoid a bad reference in + xmlXPathFreeObject below */ + nodes->nodeTab[i] = 0; + } + } + } + } + } + xmlXPathFreeObject(xpathObj); + } + xmlXPathFreeContext(xpathCtx); + } + xmlDocDumpMemory(doc, &buf_out, &len_out); + if (buf_out) + { + wrbuf_rewind(wrbuf); + wrbuf_write(wrbuf, (const char *) buf_out, len_out); + buf = wrbuf_cstr(wrbuf); + *len = len_out; + } + xmlFreeDoc(doc); + xmlFree(buf_out); + } + return buf; +} + const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema, WRBUF wrbuf, const char *type_spec, int *len) { + const char *ret = 0; + NMEM nmem = 0; + char *base64_xpath = 0; + char *base64_type_spec = 0; size_t i; char type[40]; char charset[40]; @@ -269,18 +380,47 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema, } format[j] = '\0'; } + else if (!strncmp(cp + i, "base64", 6)) + { + i = i + 6; + + while (cp[i] == ' ') + i++; + if (cp[i] == '(') + { + size_t i0; + nmem = nmem_create(); + i++; + while (cp[i] == ' ') + i++; + i0 = i; + while (cp[i] != ',' && cp[i]) + i++; + base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0); + if (cp[i]) + i++; + while (cp[i] == ' ') + i++; + i0 = i; + while (cp[i] != ')' && cp[i]) + i++; + base64_type_spec = nmem_strdupn(nmem, cp + i0, i - i0); + if (cp[i]) + i++; + } + } } if (!strcmp(type, "database")) { if (len) *len = (npr->databaseName ? strlen(npr->databaseName) : 0); - return npr->databaseName; + ret = npr->databaseName; } else if (!strcmp(type, "schema")) { if (len) *len = schema ? strlen(schema) : 0; - return schema; + ret = schema; } else if (!strcmp(type, "syntax")) { @@ -294,43 +434,46 @@ const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema, desc = "none"; if (len) *len = strlen(desc); - return desc; + ret = desc; } if (npr->which != Z_NamePlusRecord_databaseRecord) - return 0; - - /* from now on - we have a database record .. */ - if (!strcmp(type, "render")) + ; + else if (!strcmp(type, "render")) { - return get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format); + ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format); } else if (!strcmp(type, "xml")) { - return get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset, - format); + ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset, + format); } else if (!strcmp(type, "txml")) { - return get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset, - format); + ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset, + format); } else if (!strcmp(type, "raw")) { - return get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset, - format); + ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset, + format); } else if (!strcmp(type, "ext")) { if (len) *len = -1; - return (const char *) npr->u.databaseRecord; + ret = (const char *) npr->u.databaseRecord; } else if (!strcmp(type, "opac")) { if (npr->u.databaseRecord->which == Z_External_OPAC) - return get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset, - format); + ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset, + format); } - return 0; + + if (base64_xpath) + ret = base64_render(nmem, wrbuf, + ret, len, base64_xpath, base64_type_spec); + nmem_destroy(nmem); + return ret; } /* diff --git a/test/test_embed_record.c b/test/test_embed_record.c index b3c5393..aac2987 100644 --- a/test/test_embed_record.c +++ b/test/test_embed_record.c @@ -11,16 +11,19 @@ #include #include #include +#include #if YAZ_HAVE_XML2 #include #include +#include +#include #include #include -void test(void) +void test1(void) { char base_enc[] = "MDA3NjZuYW0gIDIyMDAyNjU4YSA0NTAwMDAxMDAxMjAwMDAwMDAzMDAwNjAwMDEyMDA1MDAx" @@ -44,13 +47,14 @@ void test(void) int marc_size = strlen(bin_marc); char out_rec[1000]; + yaz_marc_t marc = yaz_marc_create(); + WRBUF buf = wrbuf_alloc(); + yaz_base64decode(base_enc, out_rec); YAZ_CHECK(strcmp(out_rec, bin_marc) == 0); - yaz_marc_t marc = yaz_marc_create(); yaz_marc_read_iso2709(marc, out_rec, marc_size); - WRBUF buf = wrbuf_alloc(); yaz_marc_write_marcxml(marc, buf); yaz_marc_destroy(marc); @@ -59,12 +63,166 @@ void test(void) } #endif +static int test_render(const char *type_spec, int is_marc, const char *input, + const char *expected_output) +{ + ODR odr = odr_createmem(ODR_ENCODE); + const char *actual_output; + int actual_len; + int res = 0; + WRBUF wrbuf = wrbuf_alloc(); + + Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr)); + npr->which = Z_NamePlusRecord_databaseRecord; + if (is_marc) + npr->u.databaseRecord = z_ext_record_usmarc(odr, input, strlen(input)); + else + npr->u.databaseRecord = z_ext_record_xml(odr, input, strlen(input)); + + actual_output = yaz_record_render(npr, 0, wrbuf, type_spec, &actual_len); + + if (actual_output && expected_output) + { + if (strlen(expected_output) == actual_len && + !memcmp(expected_output, actual_output, actual_len)) + res = 1; + else + { + yaz_log(YLOG_LOG, "Got result"); + yaz_log(YLOG_LOG, "%.*s", actual_len, actual_output); + yaz_log(YLOG_LOG, "Expected result"); + yaz_log(YLOG_LOG, "%s", expected_output); + } + } + else if (!actual_output && !expected_output) + res = 1; + else if (!actual_output && expected_output) + { + yaz_log(YLOG_LOG, "Got null result, but expected"); + yaz_log(YLOG_LOG, "%s", expected_output); + } + else + { + yaz_log(YLOG_LOG, "Got result, but expected no result"); + yaz_log(YLOG_LOG, "%.*s", actual_len, actual_output); + } + wrbuf_destroy(wrbuf); + odr_destroy(odr); + return res; +} + int main(int argc, char **argv) { YAZ_CHECK_INIT(argc, argv); YAZ_CHECK_LOG(); #if YAZ_HAVE_XML2 - test(); + test1(); + YAZ_CHECK(test_render("xml", 0, "", "")); + + YAZ_CHECK(test_render( + "xml", 1, + "\x30\x30\x31\x33\x38\x6E\x61\x6D\x20\x20\x32\x32\x30\x30\x30\x37" + "\x33\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30" + "\x30\x30\x30\x30\x30\x30\x33\x30\x30\x30\x34\x30\x30\x30\x31\x33" + "\x31\x30\x30\x30\x30\x31\x37\x30\x30\x30\x31\x37\x32\x34\x35\x30" + "\x30\x33\x30\x30\x30\x30\x33\x34\x1E\x20\x20\x20\x31\x31\x32\x32" + "\x34\x34\x36\x36\x20\x1E\x44\x4C\x43\x1E\x31\x30\x1F\x61\x4A\x61" + "\x63\x6B\x20\x43\x6F\x6C\x6C\x69\x6E\x73\x1E\x31\x30\x1F\x61\x48" + "\x6F\x77\x20\x74\x6F\x20\x70\x72\x6F\x67\x72\x61\x6D\x20\x61\x20" + "\x63\x6F\x6D\x70\x75\x74\x65\x72\x1E\x1D", + "\n" + " 00138nam a22000738a 4500\n" + " 11224466 \n" + " DLC\n" + " \n" + " Jack Collins\n" + " \n" + " \n" + " How to program a computer\n" + " \n" + "\n")); + + YAZ_CHECK(test_render("xml", 0, "", "")); + + YAZ_CHECK(test_render( + "xml; base64(/my/text(),xml)", 0, + "" + "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw" + "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93" + "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d" + "", + "\n" + "\n" + " 00138nam a22000738a 4500\n" + " 11224466 \n" + " DLC\n" + " \n" + " Jack Collins\n" + " \n" + " \n" + " How to program a computer\n" + " \n" + "\n")); + + YAZ_CHECK(test_render( + "xml; charset=utf-8; base64(/my/text(),xml)", 0, + "" + "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw" + "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93" + "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d" + "", + "\n" + "\n" + " 00138nam a22000738a 4500\n" + " 11224466 \n" + " DLC\n" + " \n" + " Jack Collins\n" + " \n" + " \n" + " How to program a computer\n" + " \n" + "\n")); + + YAZ_CHECK(test_render( + "xml; base64(/my/text(),xml);charset=utf-8", 0, + "" + "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw" + "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93" + "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d" + "", + "\n" + "\n" + " 00138nam a22000738a 4500\n" + " 11224466 \n" + " DLC\n" + " \n" + " Jack Collins\n" + " \n" + " \n" + " How to program a computer\n" + " \n" + "\n")); + + YAZ_CHECK(test_render( + "xml; base64(/my/text(),txml;charset=utf-8)", 0, + "" + "MDAxMzhuYW0gIDIyMDAwNzM4YSA0NTAwMDAxMDAxMzAwMDAwMDAzMDAwNDAwMDEzMTAwMDAxNzAw" + "MDE3MjQ1MDAzMDAwMDM0HiAgIDExMjI0NDY2IB5ETEMeMTAfYUphY2sgQ29sbGlucx4xMB9hSG93" + "IHRvIHByb2dyYW0gYSBjb21wdXRlch4d" + "", + "\n" + "\n" + " 00138nam a22000738a 4500\n" + " 11224466 \n" + " DLC\n" + " \n" + " Jack Collins\n" + " \n" + " \n" + " How to program a computer\n" + " \n" + "\n")); #endif YAZ_CHECK_TERM; } -- 1.7.10.4