From 80d0b86c41a5934878cb51657e92060909f320f4 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 8 Jan 2015 14:52:55 +0100 Subject: [PATCH] record_conv: selection by X-Path YAZ-811 --- doc/book.xml | 32 +++++++++++++--- src/record_conv.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++- test/test_record_conv.c | 15 ++++++++ 3 files changed, 139 insertions(+), 6 deletions(-) diff --git a/doc/book.xml b/doc/book.xml index 1c0d40f..462cb7f 100644 --- a/doc/book.xml +++ b/doc/book.xml @@ -7393,21 +7393,25 @@ int cql_transform_rpn2cql_wrbuf(cql_transform_t ct, - xslt + select - The xslt element specifies a conversion - via &acro.xslt;. The following attributes may be specified: + The select selects one or more text nodes + and decodes them as XML. + The following attributes may be specified: - stylesheet (REQUIRED) + path (REQUIRED) - Stylesheet file. + X-Path expression for selecting text nodes. + + This conversion is available in YAZ 5.8.0 and later. + @@ -7425,6 +7429,24 @@ int cql_transform_rpn2cql_wrbuf(cql_transform_t ct, + + xslt + + + The xslt element specifies a conversion + via &acro.xslt;. The following attributes may be specified: + + stylesheet (REQUIRED) + + + Stylesheet file. + + + + + + + diff --git a/src/record_conv.c b/src/record_conv.c index 9ab5b71..1595faf 100644 --- a/src/record_conv.c +++ b/src/record_conv.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #if YAZ_HAVE_XSLT #include #include @@ -317,6 +319,95 @@ static void destroy_xslt(void *vinfo) /* YAZ_HAVE_XSLT */ #endif +struct select_info { + NMEM nmem; + char *xpath_expr; +}; + +static void *construct_select(const xmlNode *ptr, + const char *path, WRBUF wr_error) +{ + if (strcmp((const char *) ptr->name, "select")) + return 0; + else + { + struct _xmlAttr *attr; + NMEM nmem = nmem_create(); + struct select_info *info = nmem_malloc(nmem, sizeof(*info)); + + info->nmem = nmem; + info->xpath_expr = 0; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "path") && + attr->children && attr->children->type == XML_TEXT_NODE) + info->xpath_expr = + nmem_strdup(nmem, (const char *) attr->children->content); + else + { + wrbuf_printf(wr_error, "Bad attribute '%s'" + "Expected xpath.", attr->name); + nmem_destroy(nmem); + return 0; + } + } + return info; + } +} + +static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error) +{ + int ret = 0; + struct select_info *info = vinfo; + + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(wr_error, "xmlParseMemory failed"); + ret = -1; + } + else + { + xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); + if (xpathCtx && info->xpath_expr) + { + xmlXPathObjectPtr xpathObj = + xmlXPathEvalExpression((const xmlChar *) info->xpath_expr, + xpathCtx); + if (xpathObj) + { + xmlNodeSetPtr nodes = xpathObj->nodesetval; + wrbuf_rewind(record); + if (nodes) + { + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNode *ptr = nodes->nodeTab[i]; + fprintf(stderr, "xpath result %d type=%d\n", i, + ptr->type); + if (ptr->type == XML_ELEMENT_NODE) + ptr = ptr->children; + if (ptr->type == XML_TEXT_NODE) + for (; ptr; ptr = ptr->next) + wrbuf_puts(record, (const char *) ptr->content); + } + } + xmlXPathFreeObject(xpathObj); + } + xmlXPathFreeContext(xpathCtx); + } + xmlFreeDoc(doc); + } + return ret; +} + +static void destroy_select(void *info) +{ +} + + static void *construct_solrmarc(const xmlNode *ptr, const char *path, WRBUF wr_error) { @@ -592,7 +683,7 @@ static void destroy_marc(void *info) int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr, struct yaz_record_conv_type *types) { - struct yaz_record_conv_type bt[3]; + struct yaz_record_conv_type bt[4]; size_t i = 0; /* register marc */ @@ -605,6 +696,11 @@ int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr, bt[i].convert = convert_solrmarc; bt[i++].destroy = destroy_solrmarc; + bt[i-1].next = &bt[i]; + bt[i].construct = construct_select; + bt[i].convert = convert_select; + bt[i++].destroy = destroy_select; + #if YAZ_HAVE_XSLT /* register xslt */ bt[i-1].next = &bt[i]; diff --git a/test/test_record_conv.c b/test/test_record_conv.c index ff9a2a8..afd966e 100644 --- a/test/test_record_conv.c +++ b/test/test_record_conv.c @@ -252,6 +252,14 @@ static void tst_convert1(void) "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33" "#30;\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;\x20\x20" "#31;\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;#29;"; + const char *raw_rec = /* raw is xml-string of marcxml_rec */ + "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n" + " <leader>00080nam a22000498a 4500</leader>\n" + " <controlfield tag=\"001\"> 11224466 </controlfield>\n" + " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n" + " <subfield code=\"a\"> 11224466 </subfield>\n" + " </datafield>\n" + "</record>\n\n"; YAZ_CHECK(conv_configure_test("" "", 0, &p)); YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec)); + + YAZ_CHECK(conv_configure_test("" + "