From 800fb31d00e78000dd2229231401ee67d03ed78d Mon Sep 17 00:00:00 2001 From: Marc Cromme Date: Mon, 5 Mar 2007 13:02:11 +0000 Subject: [PATCH] added tests for bug #883 'Need an 'ignore' value for the z:type attribute in the canonical indexing format' resolved bug #883 tested as well on gutenberg collection zebra-setup/gutenberg case closed, see http://bugzilla.indexdata.dk/show_bug.cgi?id=883 --- index/mod_dom.c | 37 ++++++++++++++++++++++++---------- test/xslt/Makefile.am | 9 ++++++--- test/xslt/dom-config-skipped.xml | 12 +++++++++++ test/xslt/dom-index-skipped.xsl | 41 ++++++++++++++++++++++++++++++++++++++ test/xslt/dom1.c | 15 +++++++++++++- test/xslt/marc-col.xml | 4 ++-- test/xslt/zebra-dom.cfg | 10 ++++++++++ 7 files changed, 111 insertions(+), 17 deletions(-) create mode 100644 test/xslt/dom-config-skipped.xml create mode 100644 test/xslt/dom-index-skipped.xsl create mode 100644 test/xslt/zebra-dom.cfg diff --git a/index/mod_dom.c b/index/mod_dom.c index fca04eb..a2e83c7 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,4 +1,5 @@ -/* $Id: mod_dom.c,v 1.26 2007-03-03 21:39:10 adam Exp $ + +/* $Id: mod_dom.c,v 1.27 2007-03-05 13:02:11 marc Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -1057,7 +1058,6 @@ static void extract_dom_doc_node(struct filter_info *tinfo, RecWord recword; (*extctr->init)(extctr, &recword); - tinfo->record_info_invoked = 0; process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc); } @@ -1076,6 +1076,14 @@ static int convert_extract_doc(struct filter_info *tinfo, xsltStylesheetPtr last_xsp = 0; xmlDocPtr store_doc = 0; + /* per default do not ingest record */ + tinfo->record_info_invoked = 0; + + /* exit if empty document given */ + if (!doc) + return RECCTRL_EXTRACT_SKIP; + + /* we actuallu have a document which needs to be processed further */ params[0] = 0; set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record); @@ -1090,14 +1098,12 @@ static int convert_extract_doc(struct filter_info *tinfo, params, &store_doc, &last_xsp); } + /* saving either store doc or original doc in case no store doc exists */ if (last_xsp) xsltSaveResultToString(&buf_out, &len_out, store_doc ? store_doc : doc, last_xsp); else xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out); - - /* if (p->flagShowRecords) - fwrite(buf_out, len_out, 1, stdout); */ (*p->setStoreData)(p, buf_out, len_out); xmlFree(buf_out); @@ -1108,15 +1114,17 @@ static int convert_extract_doc(struct filter_info *tinfo, /* extract conversion */ perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0); + /* finally, do the indexing */ - if (doc) + if (doc){ extract_dom_doc_node(tinfo, p, doc); - - if (doc) xmlFreeDoc(doc); - + } + + /* there was nothing to index, so there is no inserted/updated record */ if (tinfo->record_info_invoked == 0) return RECCTRL_EXTRACT_SKIP; + return RECCTRL_EXTRACT_OK; } @@ -1145,13 +1153,20 @@ static int extract_xml_split(struct filter_info *tinfo, { int type = xmlTextReaderNodeType(input->u.xmlreader.reader); int depth = xmlTextReaderDepth(input->u.xmlreader.reader); + if (type == XML_READER_TYPE_ELEMENT && input->u.xmlreader.split_level == depth) { - xmlNodePtr ptr + /* per default do not ingest record */ + tinfo->record_info_invoked = 0; + + xmlNodePtr ptr = xmlTextReaderExpand(input->u.xmlreader.reader); + if (ptr) - { + { + /* we have a new document */ + xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); diff --git a/test/xslt/Makefile.am b/test/xslt/Makefile.am index b886d7a..38b6d1f 100644 --- a/test/xslt/Makefile.am +++ b/test/xslt/Makefile.am @@ -1,15 +1,18 @@ -# $Id: Makefile.am,v 1.12 2007-02-18 21:53:22 adam Exp $ +# $Id: Makefile.am,v 1.13 2007-03-05 13:02:11 marc Exp $ check_PROGRAMS = xslt1 xslt2 xslt3 xslt4 xslt5 dom1 TESTS = $(check_PROGRAMS) -EXTRA_DIST=zebra.cfg zebrastaticrank.cfg \ +EXTRA_DIST= \ + zebra.cfg zebrastaticrank.cfg zebra-dom.cfg \ marc-col.xml marc-one.xml marc-col.mrc \ marc-missing-ns.xml \ index.xsl id.xsl \ dom-index-element.xsl dom-index-pi.xsl \ marcschema-col.xml marcschema-one.xml snippet.xsl \ - dom-config-col.xml dom-config-one.xml dom-config-marc.xml + dom-config-col.xml dom-config-one.xml dom-config-marc.xml \ + dom-config-skipped.xml + xslt1_SOURCES = xslt1.c xslt2_SOURCES = xslt2.c diff --git a/test/xslt/dom-config-skipped.xml b/test/xslt/dom-config-skipped.xml new file mode 100644 index 0000000..26caa42 --- /dev/null +++ b/test/xslt/dom-config-skipped.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/test/xslt/dom-index-skipped.xsl b/test/xslt/dom-index-skipped.xsl new file mode 100644 index 0000000..1545f0d --- /dev/null +++ b/test/xslt/dom-index-skipped.xsl @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/xslt/dom1.c b/test/xslt/dom1.c index efa7edc..236d1c5 100644 --- a/test/xslt/dom1.c +++ b/test/xslt/dom1.c @@ -1,4 +1,4 @@ -/* $Id: dom1.c,v 1.1 2007-02-07 12:08:54 adam Exp $ +/* $Id: dom1.c,v 1.2 2007-03-05 13:02:11 marc Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -61,6 +61,7 @@ void tst(int argc, char **argv) zh = index_some(zs, "dom.bad.xml", "marc-col.xml"); zebra_close(zh); + /* testing XMLREADER input with PI stylesheet */ zh = index_some(zs, "dom.dom-config-col.xml", "marc-col.xml"); YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 3)); YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 1)); @@ -68,6 +69,7 @@ void tst(int argc, char **argv) YAZ_CHECK(tl_query_x(zh, "@attr 1=4 computer", 0, 121)); zebra_close(zh); + /* testing XMLREADER input with ELEMENT stylesheet */ zh = index_some(zs, "dom.dom-config-one.xml", "marc-one.xml"); YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 1)); YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 1)); @@ -75,6 +77,7 @@ void tst(int argc, char **argv) YAZ_CHECK(tl_query_x(zh, "@attr 1=4 computer", 0, 121)); zebra_close(zh); + /* testing MARC input with ELEMENT stylesheet */ zh = index_some(zs, "dom.dom-config-marc.xml", "marc-col.mrc"); YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 3)); YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 1)); @@ -82,6 +85,16 @@ void tst(int argc, char **argv) YAZ_CHECK(tl_query_x(zh, "@attr 1=4 computer", 0, 121)); zebra_close(zh); + /* testing XMLREADER input with ELEMENT stylesheet and skipped records */ + zh = index_some(zs, "dom.dom-config-skipped.xml", "marc-col.xml"); + YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=control 11224466", 0)); + YAZ_CHECK(tl_query(zh, "@attr 1=control 11224467", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=control 73090924", 0)); + + zebra_close(zh); + + YAZ_CHECK(tl_close_down(0, zs)); } diff --git a/test/xslt/marc-col.xml b/test/xslt/marc-col.xml index 71e8a77..84b6f67 100644 --- a/test/xslt/marc-col.xml +++ b/test/xslt/marc-col.xml @@ -71,12 +71,12 @@ 3 01369cam 2200265 i 4500 - 73090924 //r82 + 73090924 DLC 19820524000000.0 760609s1974 nyua b 10110 eng - 73090924 //r82 + 73090924 DLC diff --git a/test/xslt/zebra-dom.cfg b/test/xslt/zebra-dom.cfg new file mode 100644 index 0000000..a912991 --- /dev/null +++ b/test/xslt/zebra-dom.cfg @@ -0,0 +1,10 @@ +profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab + +modulePath: ../../index/.libs + +recordType: dom.dom-config-skipped.xml +#recordType: dom.dom-config-col.xml + +perm.anonymous: rw + + -- 1.7.10.4