X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmod_dom.c;h=b1555d7add25712f74d73edfc626957f16e4faba;hb=cf66499bac7c49c5bdd363a2c927295fa92f547a;hp=e65a3bff764f5ec718c08b5a2079efe2f5409c61;hpb=7a23ff31063e70f55eb387477130a358f0992988;p=idzebra-moved-to-github.git diff --git a/index/mod_dom.c b/index/mod_dom.c index e65a3bf..b1555d7 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,4 +1,5 @@ -/* $Id: mod_dom.c,v 1.25 2007-03-01 10:35:46 adam Exp $ + +/* $Id: mod_dom.c,v 1.30 2007-03-07 14:18:35 marc Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -348,25 +349,25 @@ static ZEBRA_RES perform_convert(struct filter_info *tinfo, if (last_xsp) *last_xsp = convert->stylesheet_xsp; - xmlFreeDoc(*doc); + if (!res_doc) + break; /* now saving into buffer and re-reading into DOM to avoid annoing XSLT problem with thrown-out indentation text nodes */ - if (res_doc){ - xsltSaveResultToString(&buf_out, &len_out, res_doc, - convert->stylesheet_xsp); - xmlFreeDoc(res_doc); - } + xsltSaveResultToString(&buf_out, &len_out, res_doc, + convert->stylesheet_xsp); + xmlFreeDoc(res_doc); + xmlFreeDoc(*doc); - *doc = xmlParseDoc(buf_out); + *doc = xmlParseMemory((const char *) buf_out, len_out); /* writing debug info out */ - if (extctr->flagShowRecords) - yaz_log(YLOG_LOG, "%s: XSLT %s \n %s", + if (extctr && extctr->flagShowRecords) + yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", tinfo->fname ? tinfo->fname : "(none)", convert->stylesheet, - buf_out); + len_out, buf_out); xmlFree(buf_out); } @@ -720,8 +721,6 @@ static void index_value_of(struct filter_info *tinfo, xmlChar *text = xmlNodeGetContent(node); size_t text_len = strlen((const char *)text); - yaz_log(YLOG_LOG, "Indexing :%.*s:", text_len, text); - /* if there is no text, we do not need to proceed */ if (text_len) { @@ -1059,15 +1058,6 @@ static void extract_dom_doc_node(struct filter_info *tinfo, RecWord recword; (*extctr->init)(extctr, &recword); - /* - if (extctr->flagShowRecords) - { - xmlDocDumpMemory(doc, &buf_out, &len_out); - fwrite(buf_out, len_out, 1, stdout); - xmlFree(buf_out); - } - */ - tinfo->record_info_invoked = 0; process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc); } @@ -1086,6 +1076,14 @@ static int convert_extract_doc(struct filter_info *tinfo, xsltStylesheetPtr last_xsp = 0; xmlDocPtr store_doc = 0; + /* per default do not ingest record */ + tinfo->record_info_invoked = 0; + + /* exit if empty document given */ + if (!doc) + return RECCTRL_EXTRACT_SKIP; + + /* we actuallu have a document which needs to be processed further */ params[0] = 0; set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record); @@ -1100,14 +1098,12 @@ static int convert_extract_doc(struct filter_info *tinfo, params, &store_doc, &last_xsp); } + /* saving either store doc or original doc in case no store doc exists */ if (last_xsp) xsltSaveResultToString(&buf_out, &len_out, store_doc ? store_doc : doc, last_xsp); else xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out); - - /* if (p->flagShowRecords) - fwrite(buf_out, len_out, 1, stdout); */ (*p->setStoreData)(p, buf_out, len_out); xmlFree(buf_out); @@ -1118,15 +1114,17 @@ static int convert_extract_doc(struct filter_info *tinfo, /* extract conversion */ perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0); + /* finally, do the indexing */ - if (doc) + if (doc){ extract_dom_doc_node(tinfo, p, doc); - - if (doc) xmlFreeDoc(doc); - + } + + /* there was nothing to index, so there is no inserted/updated record */ if (tinfo->record_info_invoked == 0) return RECCTRL_EXTRACT_SKIP; + return RECCTRL_EXTRACT_OK; } @@ -1144,8 +1142,9 @@ static int extract_xml_split(struct filter_info *tinfo, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE| - XML_PARSE_NOENT); + XML_PARSE_XINCLUDE + | XML_PARSE_NOENT + | XML_PARSE_NONET); } if (!input->u.xmlreader.reader) return RECCTRL_EXTRACT_ERROR_GENERIC; @@ -1155,27 +1154,34 @@ static int extract_xml_split(struct filter_info *tinfo, { int type = xmlTextReaderNodeType(input->u.xmlreader.reader); int depth = xmlTextReaderDepth(input->u.xmlreader.reader); + if (type == XML_READER_TYPE_ELEMENT && input->u.xmlreader.split_level == depth) { - xmlNodePtr ptr - = xmlTextReaderExpand(input->u.xmlreader.reader); + xmlNodePtr ptr; + + /* per default do not ingest record */ + tinfo->record_info_invoked = 0; + + ptr = xmlTextReaderExpand(input->u.xmlreader.reader); if (ptr) - { + { + /* we have a new document */ + xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); xmlDocSetRootElement(doc, ptr2); /* writing debug info out */ - if (p->flagShowRecords){ + if (p->flagShowRecords) + { xmlChar *buf_out = 0; int len_out = 0; xmlDocDumpMemory(doc, &buf_out, &len_out); - yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%s", + yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%.*s", tinfo->fname ? tinfo->fname : "(none)", - depth, - buf_out); + depth, len_out, buf_out); xmlFree(buf_out); } @@ -1205,7 +1211,9 @@ static int extract_xml_full(struct filter_info *tinfo, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE|XML_PARSE_NOENT); + XML_PARSE_XINCLUDE + | XML_PARSE_NOENT + | XML_PARSE_NONET); if (!doc) { return RECCTRL_EXTRACT_ERROR_GENERIC; @@ -1364,7 +1372,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE|XML_PARSE_NOENT); + XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET); if (!doc) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;