X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fmod_dom.c;h=7fe54c08cafe9d3e4193ad942f1d68d2c1080804;hp=1cbe3dc96c161bbc385cfaf569dc47d52da1debd;hb=e9b6a86cc5ac30d6c6331c46ef3edaa08b99a2a7;hpb=784bb11a4ee525eb88c78c3fc9e08a8b5c2e9939 diff --git a/index/mod_dom.c b/index/mod_dom.c index 1cbe3dc..7fe54c0 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1995-2008 Index Data + Copyright (C) Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -17,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -50,11 +53,26 @@ static const char *zebra_dom_ns = ZEBRA_DOM_NS; #define ZEBRA_PI_NAME "zebra-2.0" static const char *zebra_pi_name = ZEBRA_PI_NAME; +enum convert_type { + convert_xslt_type, + convert_meta_type +}; - -struct convert_s { +struct convert_xslt { const char *stylesheet; xsltStylesheetPtr stylesheet_xsp; +}; + +struct convert_meta { + int dummy; +}; + +struct convert_s { + enum convert_type which; + union { + struct convert_xslt xslt; + struct convert_meta meta; + } u; struct convert_s *next; }; @@ -94,7 +112,7 @@ struct filter_input { } u; struct filter_input *next; }; - + struct filter_info { char *fname; char *full_name; @@ -134,7 +152,7 @@ static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr, yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap); if (ptr) { - yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", + yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", xmlGetLineNo(ptr), buf); } else @@ -185,7 +203,7 @@ static void *filter_init(Res res, RecType recType) tinfo->record_info_invoked = 0; #if YAZ_HAVE_EXSLT - exsltRegisterAll(); + exsltRegisterAll(); #endif return tinfo; @@ -194,7 +212,7 @@ static void *filter_init(Res res, RecType recType) static int attr_content(struct _xmlAttr *attr, const char *name, const char **dst_content) { - if (!XML_STRCMP(attr->name, name) && attr->children + if (!XML_STRCMP(attr->name, name) && attr->children && attr->children->type == XML_TEXT_NODE) { *dst_content = (const char *)(attr->children->content); @@ -205,10 +223,13 @@ static int attr_content(struct _xmlAttr *attr, const char *name, static void destroy_xsp(struct convert_s *c) { - while(c) + while (c) { - if (c->stylesheet_xsp) - xsltFreeStylesheet(c->stylesheet_xsp); + if (c->which == convert_xslt_type) + { + if (c->u.xslt.stylesheet_xsp) + xsltFreeStylesheet(c->u.xslt.stylesheet_xsp); + } c = c->next; } } @@ -269,56 +290,70 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, if (!XML_STRCMP(ptr->name, "xslt")) { struct _xmlAttr *attr; - struct convert_s *p - = nmem_malloc(tinfo->nmem_config, sizeof(*p)); - + struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p)); + p->next = 0; - p->stylesheet = 0; - p->stylesheet_xsp = 0; - + p->which = convert_xslt_type; + p->u.xslt.stylesheet = 0; + p->u.xslt.stylesheet_xsp = 0; + for (attr = ptr->properties; attr; attr = attr->next) - if (attr_content(attr, "stylesheet", &p->stylesheet)) + if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet)) ; else { dom_log(YLOG_WARN, tinfo, ptr, "bad attribute @%s", attr->name); } - if (p->stylesheet) + if (p->u.xslt.stylesheet) { char tmp_xslt_full_name[1024]; - if (!yaz_filepath_resolve(p->stylesheet, + if (!yaz_filepath_resolve(p->u.xslt.stylesheet, tinfo->profile_path, - NULL, + NULL, tmp_xslt_full_name)) { dom_log(YLOG_WARN, tinfo, 0, "stylesheet %s not found in " "path %s", - p->stylesheet, + p->u.xslt.stylesheet, tinfo->profile_path); return ZEBRA_FAIL; } - - p->stylesheet_xsp - = xsltParseStylesheetFile((const xmlChar*) + + p->u.xslt.stylesheet_xsp + = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name); - if (!p->stylesheet_xsp) + if (!p->u.xslt.stylesheet_xsp) { dom_log(YLOG_WARN, tinfo, 0, "could not parse xslt stylesheet %s", tmp_xslt_full_name); return ZEBRA_FAIL; } - } - else - { - dom_log(YLOG_WARN, tinfo, ptr, - "missing attribute 'stylesheet' "); - return ZEBRA_FAIL; - } - *l = p; - l = &p->next; + } + else + { + dom_log(YLOG_WARN, tinfo, ptr, + "missing attribute 'stylesheet'"); + return ZEBRA_FAIL; + } + *l = p; + l = &p->next; + } + else if (!XML_STRCMP(ptr->name, "process-meta")) + { + struct _xmlAttr *attr; + struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p)); + + p->next = 0; + p->which = convert_meta_type; + + for (attr = ptr->properties; attr; attr = attr->next) + dom_log(YLOG_WARN, tinfo, ptr, + "bad attribute @%s", attr->name); + *l = p; + l = &p->next; } else { @@ -330,8 +365,64 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, return ZEBRA_OK; } -static ZEBRA_RES perform_convert(struct filter_info *tinfo, +static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, + struct recRetrieveCtrl *retctr) +{ + + if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href && + 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)) + { + if (0 == XML_STRCMP(node->name, "meta")) + { + const char *element_set_name = 0; + + struct _xmlAttr *attr; + for (attr = node->properties; attr; attr = attr->next) + { + if (attr_content(attr, "name", &element_set_name)) + ; + else + { + dom_log(YLOG_WARN, tinfo, node, + "bad attribute @%s, expected @name", attr->name); + } + } + if (element_set_name) + { + WRBUF result = wrbuf_alloc(); + WRBUF addinfo = wrbuf_alloc(); + const Odr_oid *input_format = yaz_oid_recsyn_xml; + const Odr_oid *output_format = 0; + int ret; + + ret = retctr->special_fetch(retctr->handle, + element_set_name, + input_format, &output_format, + result, addinfo); + if (ret == 0) + { + xmlDocPtr sub_doc = + xmlParseMemory(wrbuf_buf(result), wrbuf_len(result)); + if (sub_doc) + { + xmlNodePtr t = xmlDocGetRootElement(sub_doc); + xmlReplaceNode(node, xmlCopyNode(t, 1)); + xmlFreeDoc(sub_doc); + } + } + wrbuf_destroy(result); + wrbuf_destroy(addinfo); + } + } + } + for (node = node->children; node; node = node->next) + process_meta(tinfo, doc, node, retctr); + return 0; +} + +static ZEBRA_RES perform_convert(struct filter_info *tinfo, struct recExtractCtrl *extctr, + struct recRetrieveCtrl *retctr, struct convert_s *convert, const char **params, xmlDocPtr *doc, @@ -339,34 +430,48 @@ static ZEBRA_RES perform_convert(struct filter_info *tinfo, { for (; convert; convert = convert->next) { - xmlChar *buf_out = 0; - int len_out = 0; - xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp, - *doc, params); - if (last_xsp) - *last_xsp = convert->stylesheet_xsp; - - if (!res_doc) - break; + if (convert->which == convert_xslt_type) + { + xmlChar *buf_out = 0; + int len_out = 0; + xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp, + *doc, params); + if (last_xsp) + *last_xsp = convert->u.xslt.stylesheet_xsp; + + if (!res_doc) + break; - /* now saving into buffer and re-reading into DOM to avoid annoing - XSLT problem with thrown-out indentation text nodes */ - xsltSaveResultToString(&buf_out, &len_out, res_doc, - convert->stylesheet_xsp); - xmlFreeDoc(res_doc); + /* now saving into buffer and re-reading into DOM to avoid annoing + XSLT problem with thrown-out indentation text nodes */ + xsltSaveResultToString(&buf_out, &len_out, res_doc, + convert->u.xslt.stylesheet_xsp); + xmlFreeDoc(res_doc); - xmlFreeDoc(*doc); + xmlFreeDoc(*doc); - *doc = xmlParseMemory((const char *) buf_out, len_out); + *doc = xmlParseMemory((const char *) buf_out, len_out); - /* writing debug info out */ - if (extctr && extctr->flagShowRecords) - yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", - tinfo->fname ? tinfo->fname : "(none)", - convert->stylesheet, - len_out, buf_out); - - xmlFree(buf_out); + /* writing debug info out */ + if (extctr && extctr->flagShowRecords) + yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", + tinfo->fname ? tinfo->fname : "(none)", + convert->u.xslt.stylesheet, + len_out, buf_out); + + xmlFree(buf_out); + } + else if (convert->which == convert_meta_type) + { + if (retctr) /* only execute meta on retrieval */ + { + process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr); + + /* last stylesheet absent */ + if (last_xsp) + *last_xsp = 0; + } + } } return ZEBRA_OK; } @@ -395,7 +500,7 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, yaz_iconv_t iconv = 0; const char *input_charset = "marc-8"; struct _xmlAttr *attr; - + for (attr = ptr->properties; attr; attr = attr->next) { if (attr_content(attr, "inputcharset", &input_charset)) @@ -410,21 +515,21 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, iconv = yaz_iconv_open("utf-8", input_charset); if (!iconv) { - dom_log(YLOG_WARN, tinfo, ptr, + dom_log(YLOG_WARN, tinfo, ptr, "unsupported @charset '%s'", input_charset); return ZEBRA_FAIL; } else { - struct filter_input *p + struct filter_input *p = new_input(tinfo, DOM_INPUT_MARC); p->u.marc.handle = yaz_marc_create(); p->u.marc.iconv = iconv; - + yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv); - + ptr = ptr->next; - + parse_convert(tinfo, ptr, &p->convert); } break; @@ -432,7 +537,7 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, } else if (!XML_STRCMP(ptr->name, "xmlreader")) { - struct filter_input *p + struct filter_input *p = new_input(tinfo, DOM_INPUT_XMLREADER); struct _xmlAttr *attr; const char *level_str = 0; @@ -453,7 +558,7 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr, } if (level_str) p->u.xmlreader.split_level = atoi(level_str); - + ptr = ptr->next; parse_convert(tinfo, ptr, &p->convert); @@ -477,13 +582,13 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) xmlDocPtr doc; tinfo->fname = nmem_strdup(tinfo->nmem_config, fname); - - if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, + + if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, NULL, tmp_full_name)) tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name); else tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname); - + yaz_log(YLOG_LOG, "%s dom filter: " "loading config file %s", tinfo->fname, tinfo->full_name); @@ -495,16 +600,16 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) tinfo->fname, tinfo->full_name); return ZEBRA_FAIL; } - /* save because we store ptrs to the content */ + /* save because we store ptrs to the content */ tinfo->doc_config = doc; - + ptr = xmlDocGetRootElement(doc); - if (!ptr || ptr->type != XML_ELEMENT_NODE + if (!ptr || ptr->type != XML_ELEMENT_NODE || XML_STRCMP(ptr->name, "dom")) { dom_log(YLOG_WARN, tinfo, ptr, - "bad root element <%s>, expected root element ", - ptr->name); + "bad root element <%s>, expected root element ", + ptr->name); return ZEBRA_FAIL; } @@ -521,7 +626,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) struct _xmlAttr *attr; struct filter_extract *f = nmem_malloc(tinfo->nmem_config, sizeof(*f)); - + tinfo->extract = f; f->name = 0; f->convert = 0; @@ -539,8 +644,8 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) parse_convert(tinfo, ptr->children, &f->convert); } else if (!XML_STRCMP(ptr->name, "retrieve")) - { - /* + { + /* @@ -550,7 +655,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) struct filter_retrieve **fp = &tinfo->retrieve_list; struct filter_retrieve *f = nmem_malloc(tinfo->nmem_config, sizeof(*f)); - + while (*fp) fp = &(*fp)->next; @@ -562,7 +667,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) for (attr = ptr->properties; attr; attr = attr->next) { - if (attr_content(attr, "identifier", + if (attr_content(attr, "identifier", &f->identifier)) ; else if (attr_content(attr, "name", &f->name)) @@ -586,7 +691,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) */ struct filter_store *f = nmem_malloc(tinfo->nmem_config, sizeof(*f)); - + tinfo->store = f; f->convert = 0; parse_convert(tinfo, ptr->children, &f->convert); @@ -630,7 +735,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) } if (!tinfo->input_list) { - struct filter_input *p + struct filter_input *p = new_input(tinfo, DOM_INPUT_XMLREADER); p->u.xmlreader.split_level = 0; p->u.xmlreader.reader = 0; @@ -647,15 +752,15 @@ static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo, if (!est) return f; for (; f; f = f->next) - { + { /* find requested schema */ - if (est) - { + if (est) + { if (f->identifier && !strcmp(f->identifier, est)) return f; if (f->name && !strcmp(f->name, est)) return f; - } + } } return 0; } @@ -671,7 +776,7 @@ static ZEBRA_RES filter_config(void *clientData, Res res, const char *args) if (tinfo->fname && !strcmp(args, tinfo->fname)) return ZEBRA_OK; - + tinfo->profile_path = res_get(res, "profilePath"); destroy_dom(tinfo); @@ -699,35 +804,30 @@ static int ioclose_ex(void *context) } -/* DOM filter style indexing */ -static int attr_content_xml(struct _xmlAttr *attr, const char *name, - const char **dst_content) -{ - if (0 == XML_STRCMP(attr->name, name) && attr->children - && attr->children->type == XML_TEXT_NODE) - { - *dst_content = (const char *) (attr->children->content); - return 1; - } - return 0; -} - /* DOM filter style indexing */ -static void index_value_of(struct filter_info *tinfo, +static void index_value_of(struct filter_info *tinfo, struct recExtractCtrl *extctr, - RecWord* recword, - xmlNodePtr node, + RecWord* recword, + xmlNodePtr node, const char *index_p) { if (tinfo->record_info_invoked == 1) { xmlChar *text = xmlNodeGetContent(node); size_t text_len = strlen((const char *)text); - + /* if there is no text, we do not need to proceed */ if (text_len) - { + { + /* keep seqno base so that all text will have + identical seqno's for multiple fields , e.g + .. */ + + zint seqno_base = recword->seqno; + zint seqno_max = recword->seqno; + + const char *look = index_p; const char *bval; const char *eval; @@ -746,7 +846,7 @@ static void index_value_of(struct filter_info *tinfo, /* setting name and type to zero */ *index = '\0'; *type = '\0'; - + /* parsing one index name */ bval = look; while (*look && ':' != *look && ' ' != *look) @@ -756,13 +856,13 @@ static void index_value_of(struct filter_info *tinfo, eval = look; strncpy((char *)index, (const char *)bval, eval - bval); index[eval - bval] = '\0'; - - + + /* parsing one index type, if existing */ if (':' == *look) { look++; - + bval = look; while (*look && ' ' != *look) { @@ -775,51 +875,62 @@ static void index_value_of(struct filter_info *tinfo, /* actually indexing the text given */ + recword->seqno = seqno_base; recword->index_name = (const char *)index; if (*type) recword->index_type = (const char *) type; /* writing debug out */ if (extctr->flagShowRecords) - dom_log(YLOG_LOG, tinfo, 0, - "INDEX '%s:%s' '%s'", + dom_log(YLOG_LOG, tinfo, 0, + "INDEX '%s:%s' '%s'", (const char *) index, - (const char *) type, + (const char *) type, (const char *) text); - + (extctr->tokenAdd)(recword); + if (seqno_max < recword->seqno) + seqno_max = recword->seqno; + /* eat whitespaces */ if (*look && ' ' == *look) { look++; - } + } } + recword->seqno = seqno_max; } - xmlFree(text); + xmlFree(text); } } /* DOM filter style indexing */ -static void set_record_info(struct filter_info *tinfo, - struct recExtractCtrl *extctr, - xmlNodePtr node, - const char * id_p, - const char * rank_p, +static void set_record_info(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + xmlNodePtr node, + const char * id_p, + const char * rank_p, const char * type_p) { /* writing debug info out */ if (extctr && extctr->flagShowRecords) dom_log(YLOG_LOG, tinfo, node, - "RECORD id=%s rank=%s type=%s", + "RECORD id=%s rank=%s type=%s", id_p ? (const char *) id_p : "(null)", rank_p ? (const char *) rank_p : "(null)", type_p ? (const char *) type_p : "(null)"); - + if (id_p && *id_p) - sscanf((const char *)id_p, "%255s", extctr->match_criteria); + { + size_t l = strlen(id_p); + if (l >= sizeof(extctr->match_criteria)) + l = sizeof(extctr->match_criteria)-1; + memcpy(extctr->match_criteria, id_p, l); + extctr->match_criteria[l] = '\0'; + } if (rank_p && *rank_p) extctr->staticrank = atozint((const char *)rank_p); @@ -835,6 +946,8 @@ static void set_record_info(struct filter_info *tinfo, action = action_replace; else if (!strcmp(type_p, "update")) action = action_update; + else if (!strcmp(type_p, "adelete")) + action = action_a_delete; else dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p); extctr->action = action; @@ -851,25 +964,25 @@ static void set_record_info(struct filter_info *tinfo, /* DOM filter style indexing */ -static void process_xml_element_zebra_node(struct filter_info *tinfo, - struct recExtractCtrl *extctr, - RecWord* recword, +static void process_xml_element_zebra_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + RecWord* recword, xmlNodePtr node) { if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)) { - if (0 == XML_STRCMP(node->name, "index")) - { + if (0 == XML_STRCMP(node->name, "index")) + { const char *index_p = 0; - struct _xmlAttr *attr; + struct _xmlAttr *attr; for (attr = node->properties; attr; attr = attr->next) { - if (attr_content_xml(attr, "name", &index_p)) + if (attr_content(attr, "name", &index_p)) { index_value_of(tinfo, extctr, recword, node, index_p); - } + } else { dom_log(YLOG_WARN, tinfo, node, @@ -887,11 +1000,11 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, struct _xmlAttr *attr; for (attr = node->properties; attr; attr = attr->next) { - if (attr_content_xml(attr, "id", &id_p)) + if (attr_content(attr, "id", &id_p)) ; - else if (attr_content_xml(attr, "rank", &rank_p)) + else if (attr_content(attr, "rank", &rank_p)) ; - else if (attr_content_xml(attr, "type", &type_p)) + else if (attr_content(attr, "type", &type_p)) ; else { @@ -901,7 +1014,7 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, } } set_record_info(tinfo, extctr, node, id_p, rank_p, type_p); - } + } else { dom_log(YLOG_WARN, tinfo, node, @@ -919,9 +1032,6 @@ static int attr_content_pi(const char **c_ptr, const char *name, const char *look = *c_ptr; int ret = 0; - *value = '\0'; - while (*look && ' ' == *look) - look++; if (strlen(look) > name_len) { if (look[name_len] == '=' && !memcmp(look, name, name_len)) @@ -938,15 +1048,13 @@ static int attr_content_pi(const char **c_ptr, const char *name, ret = 1; } } - while (*look && ' ' == *look) - look++; *c_ptr = look; return ret; } /* DOM filter style indexing */ -static void process_xml_pi_node(struct filter_info *tinfo, - struct recExtractCtrl *extctr, +static void process_xml_pi_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, xmlNodePtr node, const char **index_pp) { @@ -955,45 +1063,54 @@ static void process_xml_pi_node(struct filter_info *tinfo, { xmlChar *pi_p = node->content; const char *look = (const char *) node->content; - + /* parsing PI record instructions */ if (0 == strncmp((const char *)look, "record", 6)) { char id[256]; char rank[256]; char type[256]; - + *id = '\0'; *rank = '\0'; *type = '\0'; look += 6; - while (*look) + for (;;) + { + /* eat whitespace */ + while (' ' == *look) + look++; + if (*look == '\0') + break; if (attr_content_pi(&look, "id", id, sizeof(id))) ; else if (attr_content_pi(&look, "rank", rank, sizeof(rank))) ; else if (attr_content_pi(&look, "type", type, sizeof(type))) + ; + else { dom_log(YLOG_WARN, tinfo, node, "content '%s', can not parse '%s'", pi_p, look); break; } + } set_record_info(tinfo, extctr, node, id, rank, type); - } + } /* parsing index instruction */ else if (0 == strncmp((const char *)look, "index", 5)) { look += 5; - + /* eat whitespace */ while (*look && ' ' == *look) look++; /* export index instructions to outside */ *index_pp = look; - } - else + } + else { dom_log(YLOG_WARN, tinfo, node, "content '%s', can not parse '%s'", @@ -1003,18 +1120,18 @@ static void process_xml_pi_node(struct filter_info *tinfo, } /* DOM filter style indexing */ -static void process_xml_element_node(struct filter_info *tinfo, - struct recExtractCtrl *extctr, - RecWord* recword, +static void process_xml_element_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, + RecWord* recword, xmlNodePtr node) { /* remember indexing instruction from PI to next element node */ const char *index_p = 0; - /* check if we are an element node in the special zebra namespace + /* check if we are an element node in the special zebra namespace and either set record data or index value-of node content*/ process_xml_element_zebra_node(tinfo, extctr, recword, node); - + /* loop through kid nodes */ for (node = node->children; node; node = node->next) { @@ -1040,8 +1157,8 @@ static void process_xml_element_node(struct filter_info *tinfo, /* DOM filter style indexing */ -static void extract_dom_doc_node(struct filter_info *tinfo, - struct recExtractCtrl *extctr, +static void extract_dom_doc_node(struct filter_info *tinfo, + struct recExtractCtrl *extctr, xmlDocPtr doc) { /* only need to do the initialization once, reuse recword for all terms */ @@ -1052,19 +1169,15 @@ static void extract_dom_doc_node(struct filter_info *tinfo, } - - -static int convert_extract_doc(struct filter_info *tinfo, +static int convert_extract_doc(struct filter_info *tinfo, struct filter_input *input, - struct recExtractCtrl *p, + struct recExtractCtrl *p, xmlDocPtr doc) - { xmlChar *buf_out; int len_out; const char *params[10]; xsltStylesheetPtr last_xsp = 0; - xmlDocPtr store_doc = 0; /* per default do not ingest record */ tinfo->record_info_invoked = 0; @@ -1081,45 +1194,47 @@ static int convert_extract_doc(struct filter_info *tinfo, { xmlChar *buf_out; int len_out; -#if 0 - FILE *outf = fopen("extract.xml", "w"); xmlDocDumpMemory(doc, &buf_out, &len_out); - fwrite(buf_out, 1, len_out, outf); -#endif - yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out); #if 0 + FILE *outf = fopen("extract.xml", "w"); + fwrite(buf_out, 1, len_out, outf); fclose(outf); #endif + yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out); } - /* input conversion */ - perform_convert(tinfo, p, input->convert, params, &doc, 0); + if (p->setStoreData) + { + xmlDocPtr store_doc = 0; + /* input conversion */ + perform_convert(tinfo, p, 0, input->convert, params, &doc, 0); - if (tinfo->store) - { - /* store conversion */ - store_doc = xmlCopyDoc(doc, 1); - perform_convert(tinfo, p, tinfo->store->convert, - params, &store_doc, &last_xsp); - } - - /* saving either store doc or original doc in case no store doc exists */ - if (last_xsp) - xsltSaveResultToString(&buf_out, &len_out, - store_doc ? store_doc : doc, last_xsp); - else - xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out); + if (tinfo->store) + { + /* store conversion */ + store_doc = xmlCopyDoc(doc, 1); + perform_convert(tinfo, p, 0, tinfo->store->convert, + params, &store_doc, &last_xsp); + } - if (p->setStoreData) - (*p->setStoreData)(p, buf_out, len_out); - xmlFree(buf_out); + /* saving either store doc or original doc in case no store doc exists */ + if (last_xsp) + xsltSaveResultToString(&buf_out, &len_out, + store_doc ? store_doc : doc, last_xsp); + else + xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out); + + if (p->setStoreData) + (*p->setStoreData)(p, buf_out, len_out); + xmlFree(buf_out); + if (store_doc) + xmlFreeDoc(store_doc); + } - if (store_doc) - xmlFreeDoc(store_doc); /* extract conversion */ - perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0); + perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0); /* finally, do the indexing */ @@ -1127,7 +1242,7 @@ static int convert_extract_doc(struct filter_info *tinfo, extract_dom_doc_node(tinfo, p, doc); xmlFreeDoc(doc); } - + /* there was nothing to index, so there is no inserted/updated record */ if (tinfo->record_info_invoked == 0) return RECCTRL_EXTRACT_SKIP; @@ -1147,7 +1262,7 @@ static int extract_xml_split(struct filter_info *tinfo, xmlFreeTextReader(input->u.xmlreader.reader); input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex, p /* I/O handler */, - 0 /* URL */, + 0 /* URL */, 0 /* encoding */, XML_PARSE_XINCLUDE | XML_PARSE_NOENT @@ -1162,36 +1277,36 @@ static int extract_xml_split(struct filter_info *tinfo, int type = xmlTextReaderNodeType(input->u.xmlreader.reader); int depth = xmlTextReaderDepth(input->u.xmlreader.reader); - if (type == XML_READER_TYPE_ELEMENT && + if (type == XML_READER_TYPE_ELEMENT && input->u.xmlreader.split_level == depth) { xmlNodePtr ptr; /* per default do not ingest record */ tinfo->record_info_invoked = 0; - + ptr = xmlTextReaderExpand(input->u.xmlreader.reader); if (ptr) - { + { /* we have a new document */ xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); - + xmlDocSetRootElement(doc, ptr2); - + /* writing debug info out */ if (p->flagShowRecords) { xmlChar *buf_out = 0; int len_out = 0; xmlDocDumpMemory(doc, &buf_out, &len_out); - yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", + yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", tinfo->fname ? tinfo->fname : "(none)", - depth, len_out, buf_out); + depth, len_out, buf_out); xmlFree(buf_out); } - + return convert_extract_doc(tinfo, input, p, doc); } else @@ -1208,13 +1323,13 @@ static int extract_xml_split(struct filter_info *tinfo, return RECCTRL_EXTRACT_EOF; } -static int extract_xml_full(struct filter_info *tinfo, +static int extract_xml_full(struct filter_info *tinfo, struct filter_input *input, struct recExtractCtrl *p) { if (p->first_record) /* only one record per stream */ { - xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, + xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, @@ -1248,7 +1363,7 @@ static int extract_iso2709(struct filter_info *tinfo, dom_log(YLOG_WARN, tinfo, 0, "MARC: Skipping bad byte %d (0x%02X)", *buf & 0xff, *buf & 0xff); - for (i = 0; i<4; i++) + for (i = 0; i < 4; i++) buf[i] = buf[i+1]; if (p->stream->readf(p->stream, buf+4, 1) != 1) @@ -1280,11 +1395,11 @@ static int extract_iso2709(struct filter_info *tinfo, { xmlDocPtr rdoc; xmlNode *root_ptr; - yaz_marc_write_xml(input->u.marc.handle, &root_ptr, + yaz_marc_write_xml(input->u.marc.handle, &root_ptr, "http://www.loc.gov/MARC21/slim", 0, 0); rdoc = xmlNewDoc((const xmlChar*) "1.0"); xmlDocSetRootElement(rdoc, root_ptr); - return convert_extract_doc(tinfo, input, p, rdoc); + return convert_extract_doc(tinfo, input, p, rdoc); } return RECCTRL_EXTRACT_OK; } @@ -1296,7 +1411,7 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) if (!input) return RECCTRL_EXTRACT_ERROR_GENERIC; - + nmem_reset(tinfo->nmem_record); if (p->setStoreData == 0) @@ -1318,7 +1433,8 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) static int ioread_ret(void *context, char *buffer, int len) { struct recRetrieveCtrl *p = context; - return p->stream->readf(p->stream, buffer, len); + int r = p->stream->readf(p->stream, buffer, len); + return r; } static int ioclose_ret(void *context) @@ -1326,7 +1442,7 @@ static int ioclose_ret(void *context) return 0; } -static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) +static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p) { /* const char *esn = zebra_dom_ns; */ const char *esn = 0; @@ -1343,7 +1459,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { esn = p->comp->u.simple->u.generic; } - else if (p->comp->which == Z_RecordComp_complex + else if (p->comp->which == Z_RecordComp_complex && p->comp->u.complex->generic->elementSpec && p->comp->u.complex->generic->elementSpec->which == Z_ElementSpec_elementSetName) @@ -1356,7 +1472,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { p->diagnostic = YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; - p->addinfo = odr_strdup(p->odr, esn); + p->addinfo = odr_strdup_null(p->odr, esn); return 0; } @@ -1392,7 +1508,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) } /* retrieve conversion */ - perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp); + perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp); if (!doc) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; @@ -1406,7 +1522,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) if (last_xsp) xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp); else - xmlDocDumpMemory(doc, &buf_out, &len_out); + xmlDocDumpMemory(doc, &buf_out, &len_out); p->output_format = yaz_oid_recsyn_xml; p->rec_len = len_out; @@ -1422,13 +1538,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) if (last_xsp) xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp); else - xmlDocDumpMemory(doc, &buf_out, &len_out); - + xmlDocDumpMemory(doc, &buf_out, &len_out); + p->output_format = yaz_oid_recsyn_sutrs; p->rec_len = len_out; p->rec_buf = odr_malloc(p->odr, p->rec_len); memcpy(p->rec_buf, buf_out, p->rec_len); - + xmlFree(buf_out); } else @@ -1450,7 +1566,7 @@ static struct recType filter_type = { }; RecType -#ifdef IDZEBRA_STATIC_DOM +#if IDZEBRA_STATIC_DOM idzebra_filter_dom #else idzebra_filter @@ -1463,6 +1579,7 @@ idzebra_filter /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab