X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmod_dom.c;h=5137efdddd977463cf03b5282dd193ecac5ffb68;hb=298a7903c3915135806074286f98e8b3f336e1d3;hp=4ae23d5192b9a2ef9d8b3824145d7ac8a3cf2ed0;hpb=20c1a7cff563f8b371ef6eaec600f7e171cbbb80;p=idzebra-moved-to-github.git diff --git a/index/mod_dom.c b/index/mod_dom.c index 4ae23d5..5137efd 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,4 +1,4 @@ -/* $Id: mod_dom.c,v 1.10 2007-02-14 16:38:41 marc Exp $ +/* $Id: mod_dom.c,v 1.15 2007-02-15 15:08:41 marc Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -42,6 +42,16 @@ #include #include +/* DOM filter style indexing */ +#define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0" +static const char *zebra_dom_ns = ZEBRA_DOM_NS; + +/* DOM filter style indexing */ +#define ZEBRA_PI_NAME "zebra-2.0" +static const char *zebra_pi_name = ZEBRA_PI_NAME; + + + struct convert_s { const char *stylesheet; xsltStylesheetPtr stylesheet_xsp; @@ -242,9 +252,10 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, ; else yaz_log(YLOG_WARN, "%s: dom filter: " - "bad attribute %s" - " for ", - tinfo->fname, attr->name); + "%s bad attribute @%s, " + "expected @stylesheet", + tinfo->fname, + xmlGetNodePath(ptr), attr->name); if (p->stylesheet) { char tmp_xslt_full_name[1024]; @@ -253,8 +264,7 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, NULL, tmp_xslt_full_name)) { - yaz_log(YLOG_WARN, - "%s: dom filter: " + yaz_log(YLOG_WARN, "%s: dom filter: " "stylesheet %s not found in " "path %s", tinfo->fname, @@ -268,8 +278,7 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, tmp_xslt_full_name); if (!p->stylesheet_xsp) { - yaz_log(YLOG_WARN, - "%s: dom filter: " + yaz_log(YLOG_WARN, "%s: dom filter: " "could not parse xslt " "stylesheet %s", tinfo->fname, tmp_xslt_full_name); @@ -278,10 +287,9 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, } else { - yaz_log(YLOG_WARN, - "%s: dom filter: " - "missing attribute 'stylesheet' " - "for element 'xslt'", tinfo->fname); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s missing attribute 'stylesheet' ", + tinfo->fname, xmlGetNodePath(ptr)); return ZEBRA_FAIL; } *l = p; @@ -290,8 +298,9 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, else { yaz_log(YLOG_LOG, - "%s: dom filter: bad node '%s' for ", - tinfo->fname, ptr->name); + "%s: dom filter: " + "%s bad node '%s'", + tinfo->fname, xmlGetNodePath(ptr), ptr->name); return ZEBRA_FAIL; } @@ -351,18 +360,19 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, if (attr_content(attr, "charset", &input_charset)) ; else - yaz_log(YLOG_WARN, - "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad attribute @%s," + " expected @charset", + tinfo->fname, + xmlGetNodePath(ptr), attr->name); } iconv = yaz_iconv_open("utf-8", input_charset); if (!iconv) { - yaz_log(YLOG_WARN, - "%s: dom filter: unsupported charset " - "'%s' for ", - tinfo->fname, input_charset); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s unsupported @charset '%s'", + tinfo->fname, xmlGetNodePath(ptr), + input_charset); return ZEBRA_FAIL; } else @@ -396,10 +406,11 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, if (attr_content(attr, "level", &level_str)) ; else - yaz_log(YLOG_WARN, - "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad attribute @%s," + " expected @level", + tinfo->fname, xmlGetNodePath(ptr), + attr->name); } if (level_str) p->u.xmlreader.split_level = atoi(level_str); @@ -411,8 +422,10 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, } else { - yaz_log(YLOG_WARN, "%s: dom filter: bad input type %s", - tinfo->fname, ptr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad element <%s>," + " expected |", + tinfo->fname, xmlGetNodePath(ptr), ptr->name); return ZEBRA_FAIL; } } @@ -433,13 +446,14 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) else tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname); - yaz_log(YLOG_LOG, "dom filter: loading config file %s", tinfo->full_name); + yaz_log(YLOG_LOG, "%s dom filter: " + "loading config file %s", tinfo->fname, tinfo->full_name); doc = xmlParseFile(tinfo->full_name); if (!doc) { - yaz_log(YLOG_WARN, - "%s: dom filter: failed to parse config file %s", + yaz_log(YLOG_WARN, "%s: dom filter: " + "failed to parse config file %s", tinfo->fname, tinfo->full_name); return ZEBRA_FAIL; } @@ -450,9 +464,10 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) if (!ptr || ptr->type != XML_ELEMENT_NODE || XML_STRCMP(ptr->name, "dom")) { - yaz_log(YLOG_WARN, - "%s: dom filter: expected root element ", - tinfo->fname); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad root element <%s>," + " expected root element ", + tinfo->fname, xmlGetNodePath(ptr), ptr->name); return ZEBRA_FAIL; } @@ -480,10 +495,11 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) if (attr_content(attr, "name", &f->name)) ; else - yaz_log(YLOG_WARN, - "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad attribute @%s" + " expected @name", + tinfo->fname, + xmlGetNodePath(ptr),attr->name); } parse_convert(tinfo, ptr->children, &f->convert); @@ -518,17 +534,18 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) else if (attr_content(attr, "name", &f->name)) ; else - yaz_log(YLOG_WARN, - "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad attribute @%s" + " expected @identifier|@name", + tinfo->fname, + xmlGetNodePath(ptr),attr->name); } parse_convert(tinfo, ptr->children, &f->convert); } else if (!XML_STRCMP(ptr->name, "store")) { /* - + @@ -560,17 +577,20 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) else if (attr_content(attr, "name", &name)) ; else - yaz_log(YLOG_WARN, - "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad attribute @%s" + " expected @syntax|@name", + tinfo->fname, + xmlGetNodePath(ptr),attr->name); } parse_input(tinfo, ptr->children, syntax, name); } else { - yaz_log(YLOG_WARN, "%s: dom filter: bad element %s", - tinfo->fname, ptr->name); + yaz_log(YLOG_WARN, "%s: dom filter: " + "%s bad element <%s>," + " expected |||", + tinfo->fname, xmlGetNodePath(ptr), ptr->name); return ZEBRA_FAIL; } } @@ -638,145 +658,6 @@ static int ioclose_ex(void *context) } -/* Alvis style indexing */ -#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1" -static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS; - -/* Alvis style indexing */ -static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl, - xmlNodePtr ptr, RecWord *recWord) -{ - for(; ptr; ptr = ptr->next) - { - index_cdata(tinfo, ctrl, ptr->children, recWord); - if (ptr->type != XML_TEXT_NODE) - continue; - recWord->term_buf = (const char *)ptr->content; - recWord->term_len = XML_STRLEN(ptr->content); - (*ctrl->tokenAdd)(recWord); - } -} - -/* Alvis style indexing */ -static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, - xmlNodePtr ptr, RecWord *recWord) -{ - for(; ptr; ptr = ptr->next) - { - index_node(tinfo, ctrl, ptr->children, recWord); - if (ptr->type != XML_ELEMENT_NODE || !ptr->ns || - XML_STRCMP(ptr->ns->href, zebra_xslt_ns)) - continue; - if (!XML_STRCMP(ptr->name, "index")) - { - const char *name_str = 0; - const char *type_str = 0; - const char *xpath_str = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - { - if (attr_content(attr, "name", &name_str)) - ; - else if (attr_content(attr, "xpath", &xpath_str)) - ; - else if (attr_content(attr, "type", &type_str)) - ; - else - yaz_log(YLOG_WARN, - "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); - } - if (name_str) - { - /* save default type */ - int prev_type = recWord->index_type; - - /* type was given */ - if (type_str && *type_str) - recWord->index_type = *type_str; - - recWord->index_name = name_str; - index_cdata(tinfo, ctrl, ptr->children, recWord); - - /* restore it again */ - recWord->index_type = prev_type; - } - } - } -} - -/* Alvis style indexing */ -static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, - xmlNodePtr ptr, RecWord *recWord) -{ - const char *type_str = "update"; - - if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns && - !XML_STRCMP(ptr->ns->href, zebra_xslt_ns) - && !XML_STRCMP(ptr->name, "record")) - { - const char *id_str = 0; - const char *rank_str = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - { - if (attr_content(attr, "type", &type_str)) - ; - else if (attr_content(attr, "id", &id_str)) - ; - else if (attr_content(attr, "rank", &rank_str)) - ; - else - yaz_log(YLOG_WARN, "%s: dom filter: bad attribute %s" - " for ", - tinfo->fname, attr->name); - } - if (id_str) - sscanf(id_str, "%255s", ctrl->match_criteria); - - if (rank_str) - ctrl->staticrank = atozint(rank_str); - ptr = ptr->children; - } - - if (!strcmp("update", type_str)) - index_node(tinfo, ctrl, ptr, recWord); - else if (!strcmp("delete", type_str)) - yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); - else - yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", - type_str); -} - - -/* Alvis style indexing */ -static void extract_doc_alvis(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlDocPtr doc) -{ - if (doc){ - RecWord recWord; - xmlChar *buf_out; - int len_out; - xmlNodePtr root_ptr; - - (*recctr->init)(recctr, &recWord); - - if (recctr->flagShowRecords){ - xmlDocDumpMemory(doc, &buf_out, &len_out); - fwrite(buf_out, len_out, 1, stdout); - xmlFree(buf_out); - } - root_ptr = xmlDocGetRootElement(doc); - if (root_ptr) - index_record(tinfo, recctr, root_ptr, &recWord); - else - yaz_log(YLOG_WARN, "No root for index XML record"); - } -} - - /* DOM filter style indexing */ static int attr_content_xml(struct _xmlAttr *attr, const char *name, xmlChar **dst_content) @@ -790,96 +671,120 @@ static int attr_content_xml(struct _xmlAttr *attr, const char *name, return 0; } -/* DOM filter style indexing */ -/* #define ZEBRA_XSLT_NS "http://indexdata.com/zebra-2.0" */ -/* static const char *zebra_xslt_ns = ZEBRA_XSLT_NS; */ /* DOM filter style indexing */ -#define ZEBRA_PI_NAME "zebra-2.0" -static const char *zebra_pi_name = ZEBRA_PI_NAME; - - -/* DOM filter style indexing */ -void index_value_of(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlNodePtr node, - xmlChar * index_p) +static void index_value_of(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + RecWord* recword, + xmlNodePtr node, + xmlChar * index_p) { xmlChar *text = xmlNodeGetContent(node); + size_t text_len = strlen((const char *)text); - xmlChar *look = index_p; - xmlChar *bval; - xmlChar *eval; - xmlChar index[256]; - xmlChar type[256]; + /* if there is no text, we do not need to proceed */ + if (text_len) + { + xmlChar *look = index_p; + xmlChar *bval; + xmlChar *eval; + + xmlChar index[256]; + xmlChar type[256]; - /* parsing all index name/type pairs - may not start with ' ' or ':' */ - while (*look && ' ' != *look && ':' != *look){ + /* assingning text to be indexed */ + recword->term_buf = (const char *)text; + recword->term_len = text_len; + + /* parsing all index name/type pairs */ + /* may not start with ' ' or ':' */ + while (*look && ' ' != *look && ':' != *look){ - /* setting name and type to zero */ - *index = '\0'; - *type = '\0'; + /* setting name and type to zero */ + *index = '\0'; + *type = '\0'; - /* parsing one index name */ - bval = look; - while (*look && ':' != *look && ' ' != *look){ - look++; - } - eval = look; - strncpy((char *)index, (const char *)bval, eval - bval); - index[eval - bval] = '\0'; + /* parsing one index name */ + bval = look; + while (*look && ':' != *look && ' ' != *look){ + look++; + } + eval = look; + strncpy((char *)index, (const char *)bval, eval - bval); + index[eval - bval] = '\0'; - /* parsing one index type, if existing */ - if (':' == *look){ - look++; + /* parsing one index type, if existing */ + if (':' == *look){ + look++; - bval = look; - while (*look && ' ' != *look){ - look++; - } - eval = look; - strncpy((char *)type, (const char *)bval, eval - bval); - type[eval - bval] = '\0'; - } + bval = look; + while (*look && ' ' != *look){ + look++; + } + eval = look; + strncpy((char *)type, (const char *)bval, eval - bval); + type[eval - bval] = '\0'; + } - printf("INDEX '%s:%s' '%s'\n", index, type, text); - - if (*look && ' ' == *look && *(look+1)){ - look++; - } - } + /* actually indexing the text given */ + yaz_log(YLOG_DEBUG, "%s dom filter: " + "INDEX '%s:%s' '%s'", + tinfo->fname, index, type, text); - xmlFree(text); + recword->index_name = (const char *)index; + if (type && *type) + recword->index_type = *type; + (extctr->tokenAdd)(recword); - /* //recWord->term_buf = (const char *)ptr->content; */ - /* //recWord->term_len = XML_STRLEN(ptr->content); */ - /* // if (type_str && *type_str) */ - /* // recWord->index_type = *type_str; /\* type was given *\/ */ - /* // recWord->index_name = name_str; */ - /* // recWord->index_type = prev_type; /\* restore it again *\/ */ + /* eat whitespaces */ + if (*look && ' ' == *look && *(look+1)){ + look++; + } + } + } + + xmlFree(text); } /* DOM filter style indexing */ -void set_record_info(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlChar * id_p, - xmlChar * rank_p, - xmlChar * type_p) +static void set_record_info(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + xmlChar * id_p, + xmlChar * rank_p, + xmlChar * type_p) { - printf("RECORD id=%s rank=%s type=%s\n", id_p, rank_p, type_p); + yaz_log(YLOG_DEBUG, "%s dom filter: " + "RECORD id=%s rank=%s type=%s", + tinfo->fname, id_p, rank_p, type_p); + + if (id_p) + sscanf((const char *)id_p, "%255s", extctr->match_criteria); + + if (rank_p) + extctr->staticrank = atozint((const char *)rank_p); + + /* if (!strcmp("update", type_str)) */ + /* index_node(tinfo, ctrl, ptr, recword); */ + /* else if (!strcmp("delete", type_str)) */ + /* yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */ + /* else */ + /* yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", */ + /* type_str); */ + } /* DOM filter style indexing */ -void process_xml_element_zebra_node(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlNodePtr node) +static void process_xml_element_zebra_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + RecWord* recword, + xmlNodePtr node) { if (node->type == XML_ELEMENT_NODE - && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_xslt_ns)){ + && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)){ if (0 == XML_STRCMP(node->name, "index")){ xmlChar *index_p = 0; @@ -887,14 +792,12 @@ void process_xml_element_zebra_node(struct filter_info *tinfo, struct _xmlAttr *attr; for (attr = node->properties; attr; attr = attr->next){ if (attr_content_xml(attr, "name", &index_p)){ - index_value_of(tinfo, recctr, node, index_p); + index_value_of(tinfo, extctr, recword,node, index_p); } else - // printf("%s: dom filter: s% bad attribute %s", - // tinfo->fname, xmlGetNodePath(node)), nodeattr->name); - printf("dom filter: %s bad attribute @%s, " - "expected @name\n", - xmlGetNodePath(node), attr->name); + yaz_log(YLOG_WARN,"%s dom filter: " + "%s bad attribute @%s, expected @name", + tinfo->fname, xmlGetNodePath(node), attr->name); } } else if (0 == XML_STRCMP(node->name, "record")){ @@ -909,28 +812,28 @@ void process_xml_element_zebra_node(struct filter_info *tinfo, else if (attr_content_xml(attr, "rank", &rank_p)) ; else if (attr_content_xml(attr, "type", &type_p)) - ; + ; else - // printf("%s: dom filter: s% bad attribute %s", - // tinfo->fname, xmlGetNodePath(node)), nodeattr->name); - printf("dom filter: %s bad attribute @%s," - " expected @id|@rank|@type\n", - xmlGetNodePath(node), attr->name); + yaz_log(YLOG_WARN,"%s dom filter: " + "%s bad attribute @%s," + " expected @id|@rank|@type", + tinfo->fname, xmlGetNodePath(node), attr->name); if (type_p && 0 != strcmp("update", (const char *)type_p)) - printf("dom filter: %s attribute @%s," - " only implemented '@type=\"update\"\n", - xmlGetNodePath(node), attr->name); + yaz_log(YLOG_WARN,"%s dom filter: " + "%s attribute @%s," + " only implemented '@type='update'", + tinfo->fname, xmlGetNodePath(node), attr->name); } - set_record_info(tinfo, recctr, id_p, rank_p, type_p); + set_record_info(tinfo, extctr, id_p, rank_p, type_p); } else { - // printf("%s: dom filter: s% bad attribute %s", - // tinfo->fname, xmlGetNodePath(node)), nodeattr->name); - printf("dom filter: %s bad element <%s>," - " expected | in namespace '%s'\n", - xmlGetNodePath(node), node->name, zebra_xslt_ns); + yaz_log(YLOG_WARN,"%s dom filter: " + "%s bad element <%s>," + " expected | in namespace '%s'", + tinfo->fname, xmlGetNodePath(node), + node->name, zebra_dom_ns); } } @@ -938,13 +841,13 @@ void process_xml_element_zebra_node(struct filter_info *tinfo, /* DOM filter style indexing */ -void process_xml_pi_node(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlNodePtr node, - xmlChar **index_pp) +static void process_xml_pi_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + xmlNodePtr node, + xmlChar **index_pp) { - /* printf("PI %s\n", xmlGetNodePath(node)); */ + /* yaz_log(YLOG_DEBUG,"PI %s\n", xmlGetNodePath(node)); */ /* if right PI name, continue parsing PI */ if (0 == strcmp(zebra_pi_name, (const char *)node->name)){ @@ -1000,13 +903,12 @@ void process_xml_pi_node(struct filter_info *tinfo, while (*look && ' ' == *look && *(look+1)) look++; - if (look && '\0' != *look){ - printf ("ERROR %s: content '%s'; can not parse '%s'\n", - xmlGetNodePath(node), pi_p, look); - } else { - /* set_record_info(id, rank, type); */ - set_record_info(tinfo, recctr, id, rank, 0); - } + if (look && '\0' != *look) + yaz_log(YLOG_WARN,"%s dom filter: " + "%s content '%s', can not parse '%s'", + tinfo->fname, xmlGetNodePath(node), pi_p, look); + else + set_record_info(tinfo, extctr, id, rank, 0); } @@ -1020,44 +922,41 @@ void process_xml_pi_node(struct filter_info *tinfo, /* export index instructions to outside */ *index_pp = look; - - /* nor record, neither index */ - } else { - - printf ("ERROR %s: content '%s'; can not parse '%s'\n", - xmlGetNodePath(node), pi_p, look); - } + } + else + yaz_log(YLOG_WARN,"%s dom filter: " + "%s content '%s', can not parse '%s'", + tinfo->fname, xmlGetNodePath(node), pi_p, look); } } /* DOM filter style indexing */ -void process_xml_element_node(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlNodePtr node) +static void process_xml_element_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + RecWord* recword, + xmlNodePtr node) { /* remember indexing instruction from PI to next element node */ xmlChar *index_p = 0; - /* printf("ELEM %s\n", xmlGetNodePath(node)); */ - /* check if we are an element node in the special zebra namespace and either set record data or index value-of node content*/ - process_xml_element_zebra_node(tinfo, recctr, node); + process_xml_element_zebra_node(tinfo, extctr, recword, node); /* loop through kid nodes */ for (node = node->children; node; node = node->next) { /* check and set PI record and index index instructions */ if (node->type == XML_PI_NODE){ - process_xml_pi_node(tinfo, recctr, node, &index_p); + process_xml_pi_node(tinfo, extctr, node, &index_p); } else if (node->type == XML_ELEMENT_NODE){ /* if there was a PI index instruction before this element */ if (index_p){ - index_value_of(tinfo, recctr, node, index_p); + index_value_of(tinfo, extctr, recword, node, index_p); index_p = 0; } - process_xml_element_node(tinfo, recctr, node); + process_xml_element_node(tinfo, extctr, recword,node); } else continue; @@ -1066,13 +965,24 @@ void process_xml_element_node(struct filter_info *tinfo, /* DOM filter style indexing */ -void extract_dom_doc_node(struct filter_info *tinfo, - struct recExtractCtrl *recctr, - xmlDocPtr doc) +static void extract_dom_doc_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + xmlDocPtr doc) { - /* printf("DOC %s\n", xmlGetNodePath((xmlNodePtr)doc)); */ + xmlChar *buf_out; + int len_out; + + /* only need to do the initialization once, reuse recword for all terms */ + RecWord recword; + (*extctr->init)(extctr, &recword); + + if (extctr->flagShowRecords){ + xmlDocDumpMemory(doc, &buf_out, &len_out); + fwrite(buf_out, len_out, 1, stdout); + xmlFree(buf_out); + } - process_xml_element_node(tinfo, recctr, (xmlNodePtr)doc); + process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc); } @@ -1084,7 +994,6 @@ static int convert_extract_doc(struct filter_info *tinfo, xmlDocPtr doc) { - /* RecWord recWord; */ xmlChar *buf_out; int len_out; const char *params[10]; @@ -1092,7 +1001,7 @@ static int convert_extract_doc(struct filter_info *tinfo, xmlDocPtr store_doc = 0; params[0] = 0; - set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr_record); + set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record); /* input conversion */ perform_convert(tinfo, input->convert, params, &doc, 0); @@ -1124,7 +1033,7 @@ static int convert_extract_doc(struct filter_info *tinfo, /* finally, do the indexing */ if (doc){ extract_dom_doc_node(tinfo, p, doc); - extract_doc_alvis(tinfo, p, doc); + /* extract_doc_alvis(tinfo, p, doc); */ xmlFreeDoc(doc); } @@ -1219,8 +1128,9 @@ static int extract_iso2709(struct filter_info *tinfo, { int i; - yaz_log(YLOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", - *buf & 0xff, *buf & 0xff); + yaz_log(YLOG_WARN, "%s dom filter: " + "MARC: Skipping bad byte %d (0x%02X)", + tinfo->fname, *buf & 0xff, *buf & 0xff); for (i = 0; i<4; i++) buf[i] = buf[i+1]; @@ -1230,21 +1140,25 @@ static int extract_iso2709(struct filter_info *tinfo, record_length = atoi_n (buf, 5); if (record_length < 25) { - yaz_log (YLOG_WARN, "MARC record length < 25, is %d", - record_length); + yaz_log (YLOG_WARN, "%s dom filter: " + "MARC record length < 25, is %d", + tinfo->fname, record_length); return RECCTRL_EXTRACT_ERROR_GENERIC; } read_bytes = p->stream->readf(p->stream, buf+5, record_length-5); if (read_bytes < record_length-5) { - yaz_log (YLOG_WARN, "Couldn't read whole MARC record"); + yaz_log (YLOG_WARN, "%s dom filter: " + "Couldn't read whole MARC record", + tinfo->fname); return RECCTRL_EXTRACT_ERROR_GENERIC; } r = yaz_marc_read_iso2709(input->u.marc.handle, buf, record_length); if (r < record_length) { - yaz_log (YLOG_WARN, "Parsing of MARC record failed r=%d length=%d", - r, record_length); + yaz_log (YLOG_WARN, "%s dom filter: " + "Parsing of MARC record failed r=%d length=%d", + tinfo->fname, r, record_length); return RECCTRL_EXTRACT_ERROR_GENERIC; } else @@ -1295,7 +1209,7 @@ static int ioclose_ret(void *context) static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { - /* const char *esn = zebra_xslt_ns; */ + /* const char *esn = zebra_dom_ns; */ const char *esn = 0; const char *params[32]; struct filter_info *tinfo = clientData;