X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fmod_dom.c;h=3340ec454832c68936461c46e570f0e3248c92f5;hp=131c09af86ef814defb92dba4007b0cb9d8f3be9;hb=426d07a60c57c3555934655a78437cf4677c65c8;hpb=02e8762b23b8043382edeeb0eed93e2519471570 diff --git a/index/mod_dom.c b/index/mod_dom.c index 131c09a..3340ec4 100644 --- a/index/mod_dom.c +++ b/index/mod_dom.c @@ -1,23 +1,19 @@ +/* This file is part of the Zebra server. + Copyright (C) 1994-2009 Index Data -/* $Id: mod_dom.c,v 1.29 2007-03-06 12:09:44 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. - This file is part of the Zebra server. +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. - Zebra is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. - - Zebra is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -44,6 +40,7 @@ #include #include +#include /* DOM filter style indexing */ #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0" @@ -53,11 +50,26 @@ static const char *zebra_dom_ns = ZEBRA_DOM_NS; #define ZEBRA_PI_NAME "zebra-2.0" static const char *zebra_pi_name = ZEBRA_PI_NAME; +enum convert_type { + convert_xslt_type, + convert_meta_type +}; - -struct convert_s { +struct convert_xslt { const char *stylesheet; xsltStylesheetPtr stylesheet_xsp; +}; + +struct convert_meta { + int dummy; +}; + +struct convert_s { + enum convert_type which; + union { + struct convert_xslt xslt; + struct convert_meta meta; + } u; struct convert_s *next; }; @@ -86,14 +98,14 @@ struct filter_input { int type; union { struct { + xmlTextReaderPtr reader; + int split_level; + } xmlreader; + struct { const char *input_charset; yaz_marc_t handle; yaz_iconv_t iconv; } marc; - struct { - xmlTextReaderPtr reader; - int split_level; - } xmlreader; } u; struct filter_input *next; }; @@ -102,8 +114,8 @@ struct filter_info { char *fname; char *full_name; const char *profile_path; - ODR odr_record; - ODR odr_config; + NMEM nmem_record; + NMEM nmem_config; xmlDocPtr doc_config; struct filter_extract *extract; struct filter_retrieve *retrieve_list; @@ -149,9 +161,9 @@ static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr, static void set_param_str(const char **params, const char *name, - const char *value, ODR odr) + const char *value, NMEM nmem) { - char *quoted = odr_malloc(odr, 3 + strlen(value)); + char *quoted = nmem_malloc(nmem, 3 + strlen(value)); sprintf(quoted, "'%s'", value); while (*params) params++; @@ -161,9 +173,9 @@ static void set_param_str(const char **params, const char *name, } static void set_param_int(const char **params, const char *name, - zint value, ODR odr) + zint value, NMEM nmem) { - char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */ + char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */ while (*params) params++; sprintf(quoted, "'" ZINT_FORMAT "'", value); @@ -178,8 +190,8 @@ static void *filter_init(Res res, RecType recType) tinfo->fname = 0; tinfo->full_name = 0; tinfo->profile_path = 0; - tinfo->odr_record = odr_createmem(ODR_ENCODE); - tinfo->odr_config = odr_createmem(ODR_ENCODE); + tinfo->nmem_record = nmem_create(); + tinfo->nmem_config = nmem_create(); tinfo->extract = 0; tinfo->retrieve_list = 0; tinfo->input_list = 0; @@ -208,10 +220,13 @@ static int attr_content(struct _xmlAttr *attr, const char *name, static void destroy_xsp(struct convert_s *c) { - while(c) + while (c) { - if (c->stylesheet_xsp) - xsltFreeStylesheet(c->stylesheet_xsp); + if (c->which == convert_xslt_type) + { + if (c->u.xslt.stylesheet_xsp) + xsltFreeStylesheet(c->u.xslt.stylesheet_xsp); + } c = c->next; } } @@ -261,7 +276,7 @@ static void destroy_dom(struct filter_info *tinfo) xmlFreeDoc(tinfo->doc_config); tinfo->doc_config = 0; } - odr_reset(tinfo->odr_config); + nmem_reset(tinfo->nmem_config); } static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, @@ -272,25 +287,25 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, if (!XML_STRCMP(ptr->name, "xslt")) { struct _xmlAttr *attr; - struct convert_s *p - = odr_malloc(tinfo->odr_config, sizeof(*p)); + struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p)); p->next = 0; - p->stylesheet = 0; - p->stylesheet_xsp = 0; + p->which = convert_xslt_type; + p->u.xslt.stylesheet = 0; + p->u.xslt.stylesheet_xsp = 0; for (attr = ptr->properties; attr; attr = attr->next) - if (attr_content(attr, "stylesheet", &p->stylesheet)) + if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet)) ; else { dom_log(YLOG_WARN, tinfo, ptr, "bad attribute @%s", attr->name); } - if (p->stylesheet) + if (p->u.xslt.stylesheet) { char tmp_xslt_full_name[1024]; - if (!yaz_filepath_resolve(p->stylesheet, + if (!yaz_filepath_resolve(p->u.xslt.stylesheet, tinfo->profile_path, NULL, tmp_xslt_full_name)) @@ -298,30 +313,44 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, dom_log(YLOG_WARN, tinfo, 0, "stylesheet %s not found in " "path %s", - p->stylesheet, + p->u.xslt.stylesheet, tinfo->profile_path); return ZEBRA_FAIL; } - p->stylesheet_xsp + p->u.xslt.stylesheet_xsp = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name); - if (!p->stylesheet_xsp) + if (!p->u.xslt.stylesheet_xsp) { dom_log(YLOG_WARN, tinfo, 0, "could not parse xslt stylesheet %s", tmp_xslt_full_name); return ZEBRA_FAIL; } - } - else - { - dom_log(YLOG_WARN, tinfo, ptr, - "missing attribute 'stylesheet' "); - return ZEBRA_FAIL; - } - *l = p; - l = &p->next; + } + else + { + dom_log(YLOG_WARN, tinfo, ptr, + "missing attribute 'stylesheet'"); + return ZEBRA_FAIL; + } + *l = p; + l = &p->next; + } + else if (!XML_STRCMP(ptr->name, "process-meta")) + { + struct _xmlAttr *attr; + struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p)); + + p->next = 0; + p->which = convert_meta_type; + + for (attr = ptr->properties; attr; attr = attr->next) + dom_log(YLOG_WARN, tinfo, ptr, + "bad attribute @%s", attr->name); + *l = p; + l = &p->next; } else { @@ -333,8 +362,64 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr, return ZEBRA_OK; } +static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, + struct recRetrieveCtrl *retctr) +{ + + if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href && + 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)) + { + if (0 == XML_STRCMP(node->name, "meta")) + { + const char *element_set_name = 0; + + struct _xmlAttr *attr; + for (attr = node->properties; attr; attr = attr->next) + { + if (attr_content(attr, "name", &element_set_name)) + ; + else + { + dom_log(YLOG_WARN, tinfo, node, + "bad attribute @%s, expected @name", attr->name); + } + } + if (element_set_name) + { + WRBUF result = wrbuf_alloc(); + WRBUF addinfo = wrbuf_alloc(); + const Odr_oid *input_format = yaz_oid_recsyn_xml; + const Odr_oid *output_format = 0; + int ret; + + ret = retctr->special_fetch(retctr->handle, + element_set_name, + input_format, &output_format, + result, addinfo); + if (ret == 0) + { + xmlDocPtr sub_doc = + xmlParseMemory(wrbuf_buf(result), wrbuf_len(result)); + if (sub_doc) + { + xmlNodePtr t = xmlDocGetRootElement(sub_doc); + xmlReplaceNode(node, xmlCopyNode(t, 1)); + xmlFreeDoc(sub_doc); + } + } + wrbuf_destroy(result); + wrbuf_destroy(addinfo); + } + } + } + for (node = node->children; node; node = node->next) + process_meta(tinfo, doc, node, retctr); + return 0; +} + static ZEBRA_RES perform_convert(struct filter_info *tinfo, struct recExtractCtrl *extctr, + struct recRetrieveCtrl *retctr, struct convert_s *convert, const char **params, xmlDocPtr *doc, @@ -342,34 +427,48 @@ static ZEBRA_RES perform_convert(struct filter_info *tinfo, { for (; convert; convert = convert->next) { - xmlChar *buf_out = 0; - int len_out = 0; - xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp, - *doc, params); - if (last_xsp) - *last_xsp = convert->stylesheet_xsp; - - if (!res_doc) - break; - - /* now saving into buffer and re-reading into DOM to avoid annoing - XSLT problem with thrown-out indentation text nodes */ - xsltSaveResultToString(&buf_out, &len_out, res_doc, - convert->stylesheet_xsp); - xmlFreeDoc(res_doc); - - xmlFreeDoc(*doc); - - *doc = xmlParseMemory((const char *) buf_out, len_out); + if (convert->which == convert_xslt_type) + { + xmlChar *buf_out = 0; + int len_out = 0; + xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp, + *doc, params); + if (last_xsp) + *last_xsp = convert->u.xslt.stylesheet_xsp; + + if (!res_doc) + break; + + /* now saving into buffer and re-reading into DOM to avoid annoing + XSLT problem with thrown-out indentation text nodes */ + xsltSaveResultToString(&buf_out, &len_out, res_doc, + convert->u.xslt.stylesheet_xsp); + xmlFreeDoc(res_doc); + + xmlFreeDoc(*doc); + + *doc = xmlParseMemory((const char *) buf_out, len_out); + + /* writing debug info out */ + if (extctr && extctr->flagShowRecords) + yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", + tinfo->fname ? tinfo->fname : "(none)", + convert->u.xslt.stylesheet, + len_out, buf_out); + + xmlFree(buf_out); + } + else if (convert->which == convert_meta_type) + { + if (retctr) /* only execute meta on retrieval */ + { + process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr); - /* writing debug info out */ - if (extctr && extctr->flagShowRecords) - yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", - tinfo->fname ? tinfo->fname : "(none)", - convert->stylesheet, - len_out, buf_out); - - xmlFree(buf_out); + /* last stylesheet absent */ + if (last_xsp) + *last_xsp = 0; + } + } } return ZEBRA_OK; } @@ -380,7 +479,7 @@ static struct filter_input *new_input(struct filter_info *tinfo, int type) struct filter_input **np = &tinfo->input_list; for (;*np; np = &(*np)->next) ; - p = *np = odr_malloc(tinfo->odr_config, sizeof(*p)); + p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p)); p->next = 0; p->syntax = 0; p->name = 0; @@ -479,13 +578,13 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) xmlNodePtr ptr; xmlDocPtr doc; - tinfo->fname = odr_strdup(tinfo->odr_config, fname); + tinfo->fname = nmem_strdup(tinfo->nmem_config, fname); if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, NULL, tmp_full_name)) - tinfo->full_name = odr_strdup(tinfo->odr_config, tmp_full_name); + tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name); else - tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname); + tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname); yaz_log(YLOG_LOG, "%s dom filter: " "loading config file %s", tinfo->fname, tinfo->full_name); @@ -523,7 +622,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) */ struct _xmlAttr *attr; struct filter_extract *f = - odr_malloc(tinfo->odr_config, sizeof(*f)); + nmem_malloc(tinfo->nmem_config, sizeof(*f)); tinfo->extract = f; f->name = 0; @@ -552,7 +651,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) struct _xmlAttr *attr; struct filter_retrieve **fp = &tinfo->retrieve_list; struct filter_retrieve *f = - odr_malloc(tinfo->odr_config, sizeof(*f)); + nmem_malloc(tinfo->nmem_config, sizeof(*f)); while (*fp) fp = &(*fp)->next; @@ -588,7 +687,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) */ struct filter_store *f = - odr_malloc(tinfo->odr_config, sizeof(*f)); + nmem_malloc(tinfo->nmem_config, sizeof(*f)); tinfo->store = f; f->convert = 0; @@ -631,6 +730,13 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname) return ZEBRA_FAIL; } } + if (!tinfo->input_list) + { + struct filter_input *p + = new_input(tinfo, DOM_INPUT_XMLREADER); + p->u.xmlreader.split_level = 0; + p->u.xmlreader.reader = 0; + } return ZEBRA_OK; } @@ -678,8 +784,8 @@ static void filter_destroy(void *clientData) { struct filter_info *tinfo = clientData; destroy_dom(tinfo); - odr_destroy(tinfo->odr_config); - odr_destroy(tinfo->odr_record); + nmem_destroy(tinfo->nmem_config); + nmem_destroy(tinfo->nmem_record); xfree(tinfo); } @@ -695,26 +801,13 @@ static int ioclose_ex(void *context) } -/* DOM filter style indexing */ -static int attr_content_xml(struct _xmlAttr *attr, const char *name, - xmlChar **dst_content) -{ - if (0 == XML_STRCMP(attr->name, name) && attr->children - && attr->children->type == XML_TEXT_NODE) - { - *dst_content = (attr->children->content); - return 1; - } - return 0; -} - /* DOM filter style indexing */ static void index_value_of(struct filter_info *tinfo, struct recExtractCtrl *extctr, RecWord* recword, xmlNodePtr node, - xmlChar * index_p) + const char *index_p) { if (tinfo->record_info_invoked == 1) { @@ -724,9 +817,17 @@ static void index_value_of(struct filter_info *tinfo, /* if there is no text, we do not need to proceed */ if (text_len) { - xmlChar *look = index_p; - xmlChar *bval; - xmlChar *eval; + /* keep seqno base so that all text will have + identical seqno's for multiple fields , e.g + .. */ + + zint seqno_base = recword->seqno; + zint seqno_max = recword->seqno; + + + const char *look = index_p; + const char *bval; + const char *eval; xmlChar index[256]; xmlChar type[256]; @@ -770,36 +871,32 @@ static void index_value_of(struct filter_info *tinfo, } /* actually indexing the text given */ - dom_log(YLOG_DEBUG, tinfo, 0, - "INDEX '%s:%s' '%s'", - index ? (const char *) index : "null", - type ? (const char *) type : "null", - text ? (const char *) text : "null"); + recword->seqno = seqno_base; recword->index_name = (const char *)index; - if (type && *type) - recword->index_type = *type; + if (*type) + recword->index_type = (const char *) type; /* writing debug out */ if (extctr->flagShowRecords) dom_log(YLOG_LOG, tinfo, 0, "INDEX '%s:%s' '%s'", - index ? (const char *) index : "null", - type ? (const char *) type : "null", - text ? (const char *) text : "null"); + (const char *) index, + (const char *) type, + (const char *) text); - /* actually indexing the text given */ - recword->index_name = (const char *)index; - if (type && *type) - recword->index_type = *type; (extctr->tokenAdd)(recword); + if (seqno_max < recword->seqno) + seqno_max = recword->seqno; + /* eat whitespaces */ - if (*look && ' ' == *look && *(look+1)) + if (*look && ' ' == *look) { look++; } } + recword->seqno = seqno_max; } xmlFree(text); } @@ -810,33 +907,41 @@ static void index_value_of(struct filter_info *tinfo, static void set_record_info(struct filter_info *tinfo, struct recExtractCtrl *extctr, xmlNodePtr node, - xmlChar * id_p, - xmlChar * rank_p, - xmlChar * type_p) + const char * id_p, + const char * rank_p, + const char * type_p) { - /* writing debug info out */ - if (extctr->flagShowRecords) - dom_log(YLOG_LOG, tinfo, 0, + if (extctr && extctr->flagShowRecords) + dom_log(YLOG_LOG, tinfo, node, "RECORD id=%s rank=%s type=%s", id_p ? (const char *) id_p : "(null)", rank_p ? (const char *) rank_p : "(null)", type_p ? (const char *) type_p : "(null)"); - if (id_p) + if (id_p && *id_p) sscanf((const char *)id_p, "%255s", extctr->match_criteria); - if (rank_p) + if (rank_p && *rank_p) extctr->staticrank = atozint((const char *)rank_p); - /* if (!strcmp("update", type_str)) */ - /* index_node(tinfo, ctrl, ptr, recword); */ - /* else if (!strcmp("delete", type_str)) */ - /* dom_log(YLOG_WARN, tinfo, ptr, "dom filter delete: to be implemented"); */ - /* else */ - /* dom_log(YLOG_WARN, tinfo, ptr, "dom filter: unknown record type '%s'", */ - /* type_str); */ + if (type_p && *type_p) + { + enum zebra_recctrl_action_t action = action_update; + if (!strcmp(type_p, "insert")) + action = action_insert; + else if (!strcmp(type_p, "delete")) + action = action_delete; + else if (!strcmp(type_p, "replace")) + action = action_replace; + else if (!strcmp(type_p, "update")) + action = action_update; + else + dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p); + extctr->action = action; + } + if (tinfo->record_info_invoked == 1) { /* warn about multiple only once */ @@ -856,16 +961,16 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)) { - if (0 == XML_STRCMP(node->name, "index")) - { - xmlChar *index_p = 0; + if (0 == XML_STRCMP(node->name, "index")) + { + const char *index_p = 0; struct _xmlAttr *attr; for (attr = node->properties; attr; attr = attr->next) { - if (attr_content_xml(attr, "name", &index_p)) + if (attr_content(attr, "name", &index_p)) { - index_value_of(tinfo, extctr, recword,node, index_p); + index_value_of(tinfo, extctr, recword, node, index_p); } else { @@ -877,18 +982,18 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, } else if (0 == XML_STRCMP(node->name, "record")) { - xmlChar *id_p = 0; - xmlChar *rank_p = 0; - xmlChar *type_p = 0; + const char *id_p = 0; + const char *rank_p = 0; + const char *type_p = 0; struct _xmlAttr *attr; for (attr = node->properties; attr; attr = attr->next) { - if (attr_content_xml(attr, "id", &id_p)) + if (attr_content(attr, "id", &id_p)) ; - else if (attr_content_xml(attr, "rank", &rank_p)) + else if (attr_content(attr, "rank", &rank_p)) ; - else if (attr_content_xml(attr, "type", &type_p)) + else if (attr_content(attr, "type", &type_p)) ; else { @@ -896,13 +1001,6 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, "bad attribute @%s, expected @id|@rank|@type", attr->name); } - - if (type_p && 0 != strcmp("update", (const char *)type_p)) - { - dom_log(YLOG_WARN, tinfo, node, - "attribute @%s, only implemented '@type='update'", - attr->name); - } } set_record_info(tinfo, extctr, node, id_p, rank_p, type_p); } @@ -916,80 +1014,74 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo, } } +static int attr_content_pi(const char **c_ptr, const char *name, + char *value, size_t value_max) +{ + size_t name_len = strlen(name); + const char *look = *c_ptr; + int ret = 0; + + *value = '\0'; + while (*look && ' ' == *look) + look++; + if (strlen(look) > name_len) + { + if (look[name_len] == '=' && !memcmp(look, name, name_len)) + { + size_t i = 0; + look += name_len+1; + while (*look && ' ' != *look) + { + if (i < value_max-1) + value[i++] = *look; + look++; + } + value[i] = '\0'; + ret = 1; + } + } + while (*look && ' ' == *look) + look++; + *c_ptr = look; + return ret; +} /* DOM filter style indexing */ static void process_xml_pi_node(struct filter_info *tinfo, struct recExtractCtrl *extctr, xmlNodePtr node, - xmlChar **index_pp) + const char **index_pp) { /* if right PI name, continue parsing PI */ if (0 == strcmp(zebra_pi_name, (const char *)node->name)) { xmlChar *pi_p = node->content; - xmlChar *look = pi_p; + const char *look = (const char *) node->content; - xmlChar *bval; - xmlChar *eval; - /* parsing PI record instructions */ if (0 == strncmp((const char *)look, "record", 6)) { - xmlChar id[256]; - xmlChar rank[256]; - xmlChar type[256]; - + char id[256]; + char rank[256]; + char type[256]; + *id = '\0'; *rank = '\0'; *type = '\0'; - look += 6; - - /* eat whitespace */ - while (*look && ' ' == *look && *(look+1)) - look++; - - /* parse possible id */ - if (*look && 0 == strncmp((const char *)look, "id=", 3)) - { - look += 3; - bval = look; - while (*look && ' ' != *look) - look++; - eval = look; - strncpy((char *)id, (const char *)bval, eval - bval); - id[eval - bval] = '\0'; - } - - /* eat whitespace */ - while (*look && ' ' == *look && *(look+1)) - look++; - - /* parse possible rank */ - if (*look && 0 == strncmp((const char *)look, "rank=", 5)) - { - look += 6; - bval = look; - while (*look && ' ' != *look) - look++; - eval = look; - strncpy((char *)rank, (const char *)bval, eval - bval); - rank[eval - bval] = '\0'; - } - - /* eat whitespace */ - while (*look && ' ' == *look && *(look+1)) - look++; - - if (look && '\0' != *look) - { - dom_log(YLOG_WARN, tinfo, node, - "content '%s', can not parse '%s'", - pi_p, look); - } - else - set_record_info(tinfo, extctr, node, id, rank, 0); - + while (*look) + if (attr_content_pi(&look, "id", id, sizeof(id))) + ; + else if (attr_content_pi(&look, "rank", rank, sizeof(rank))) + ; + else if (attr_content_pi(&look, "type", type, sizeof(type))) + { + dom_log(YLOG_WARN, tinfo, node, + "content '%s', can not parse '%s'", + pi_p, look); + break; + } + set_record_info(tinfo, extctr, node, id, rank, type); } /* parsing index instruction */ else if (0 == strncmp((const char *)look, "index", 5)) @@ -997,7 +1089,7 @@ static void process_xml_pi_node(struct filter_info *tinfo, look += 5; /* eat whitespace */ - while (*look && ' ' == *look && *(look+1)) + while (*look && ' ' == *look) look++; /* export index instructions to outside */ @@ -1019,7 +1111,7 @@ static void process_xml_element_node(struct filter_info *tinfo, xmlNodePtr node) { /* remember indexing instruction from PI to next element node */ - xmlChar *index_p = 0; + const char *index_p = 0; /* check if we are an element node in the special zebra namespace and either set record data or index value-of node content*/ @@ -1062,19 +1154,15 @@ static void extract_dom_doc_node(struct filter_info *tinfo, } - - static int convert_extract_doc(struct filter_info *tinfo, struct filter_input *input, struct recExtractCtrl *p, xmlDocPtr doc) - { xmlChar *buf_out; int len_out; const char *params[10]; xsltStylesheetPtr last_xsp = 0; - xmlDocPtr store_doc = 0; /* per default do not ingest record */ tinfo->record_info_invoked = 0; @@ -1085,34 +1173,53 @@ static int convert_extract_doc(struct filter_info *tinfo, /* we actuallu have a document which needs to be processed further */ params[0] = 0; - set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record); - - /* input conversion */ - perform_convert(tinfo, p, input->convert, params, &doc, 0); + set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record); - if (tinfo->store) + if (p && p->flagShowRecords) { - /* store conversion */ - store_doc = xmlCopyDoc(doc, 1); - perform_convert(tinfo, p, tinfo->store->convert, - params, &store_doc, &last_xsp); + xmlChar *buf_out; + int len_out; + xmlDocDumpMemory(doc, &buf_out, &len_out); +#if 0 + FILE *outf = fopen("extract.xml", "w"); + fwrite(buf_out, 1, len_out, outf); + fclose(outf); +#endif + yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out); } - - /* saving either store doc or original doc in case no store doc exists */ - if (last_xsp) - xsltSaveResultToString(&buf_out, &len_out, - store_doc ? store_doc : doc, last_xsp); - else - xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out); - (*p->setStoreData)(p, buf_out, len_out); - xmlFree(buf_out); + if (p->setStoreData) + { + xmlDocPtr store_doc = 0; + + /* input conversion */ + perform_convert(tinfo, p, 0, input->convert, params, &doc, 0); + + if (tinfo->store) + { + /* store conversion */ + store_doc = xmlCopyDoc(doc, 1); + perform_convert(tinfo, p, 0, tinfo->store->convert, + params, &store_doc, &last_xsp); + } + + /* saving either store doc or original doc in case no store doc exists */ + if (last_xsp) + xsltSaveResultToString(&buf_out, &len_out, + store_doc ? store_doc : doc, last_xsp); + else + xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out); + + if (p->setStoreData) + (*p->setStoreData)(p, buf_out, len_out); + xmlFree(buf_out); + if (store_doc) + xmlFreeDoc(store_doc); + } - if (store_doc) - xmlFreeDoc(store_doc); /* extract conversion */ - perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0); + perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0); /* finally, do the indexing */ @@ -1142,8 +1249,9 @@ static int extract_xml_split(struct filter_info *tinfo, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE| - XML_PARSE_NOENT); + XML_PARSE_XINCLUDE + | XML_PARSE_NOENT + | XML_PARSE_NONET); } if (!input->u.xmlreader.reader) return RECCTRL_EXTRACT_ERROR_GENERIC; @@ -1164,7 +1272,7 @@ static int extract_xml_split(struct filter_info *tinfo, ptr = xmlTextReaderExpand(input->u.xmlreader.reader); if (ptr) - { + { /* we have a new document */ xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); @@ -1178,7 +1286,7 @@ static int extract_xml_split(struct filter_info *tinfo, xmlChar *buf_out = 0; int len_out = 0; xmlDocDumpMemory(doc, &buf_out, &len_out); - yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%.*s", + yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", tinfo->fname ? tinfo->fname : "(none)", depth, len_out, buf_out); xmlFree(buf_out); @@ -1210,7 +1318,9 @@ static int extract_xml_full(struct filter_info *tinfo, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE|XML_PARSE_NOENT); + XML_PARSE_XINCLUDE + | XML_PARSE_NOENT + | XML_PARSE_NONET); if (!doc) { return RECCTRL_EXTRACT_ERROR_GENERIC; @@ -1270,7 +1380,8 @@ static int extract_iso2709(struct filter_info *tinfo, { xmlDocPtr rdoc; xmlNode *root_ptr; - yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0); + yaz_marc_write_xml(input->u.marc.handle, &root_ptr, + "http://www.loc.gov/MARC21/slim", 0, 0); rdoc = xmlNewDoc((const xmlChar*) "1.0"); xmlDocSetRootElement(rdoc, root_ptr); return convert_extract_doc(tinfo, input, p, rdoc); @@ -1285,8 +1396,11 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) if (!input) return RECCTRL_EXTRACT_ERROR_GENERIC; + + nmem_reset(tinfo->nmem_record); - odr_reset(tinfo->odr_record); + if (p->setStoreData == 0) + return extract_xml_full(tinfo, input, p); switch(input->type) { case DOM_INPUT_XMLREADER: @@ -1304,7 +1418,8 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) static int ioread_ret(void *context, char *buffer, int len) { struct recRetrieveCtrl *p = context; - return p->stream->readf(p->stream, buffer, len); + int r = p->stream->readf(p->stream, buffer, len); + return r; } static int ioclose_ret(void *context) @@ -1312,7 +1427,7 @@ static int ioclose_ret(void *context) return 0; } -static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) +static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p) { /* const char *esn = zebra_dom_ns; */ const char *esn = 0; @@ -1342,34 +1457,35 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { p->diagnostic = YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + p->addinfo = odr_strdup(p->odr, esn); return 0; } params[0] = 0; - set_param_int(params, "id", p->localno, p->odr); + set_param_int(params, "id", p->localno, p->odr->mem); if (p->fname) - set_param_str(params, "filename", p->fname, p->odr); + set_param_str(params, "filename", p->fname, p->odr->mem); if (p->staticrank >= 0) - set_param_int(params, "rank", p->staticrank, p->odr); + set_param_int(params, "rank", p->staticrank, p->odr->mem); if (esn) - set_param_str(params, "schema", esn, p->odr); + set_param_str(params, "schema", esn, p->odr->mem); else if (retrieve->name) - set_param_str(params, "schema", retrieve->name, p->odr); + set_param_str(params, "schema", retrieve->name, p->odr->mem); else if (retrieve->identifier) - set_param_str(params, "schema", retrieve->identifier, p->odr); + set_param_str(params, "schema", retrieve->identifier, p->odr->mem); else - set_param_str(params, "schema", "", p->odr); + set_param_str(params, "schema", "", p->odr->mem); if (p->score >= 0) - set_param_int(params, "score", p->score, p->odr); - set_param_int(params, "size", p->recordSize, p->odr); + set_param_int(params, "score", p->score, p->odr->mem); + set_param_int(params, "size", p->recordSize, p->odr->mem); doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE|XML_PARSE_NOENT); + XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET); if (!doc) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; @@ -1377,12 +1493,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) } /* retrieve conversion */ - perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp); + perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp); if (!doc) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; } - else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML) + else if (!p->input_format + || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml)) { xmlChar *buf_out; int len_out; @@ -1392,13 +1509,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) else xmlDocDumpMemory(doc, &buf_out, &len_out); - p->output_format = VAL_TEXT_XML; + p->output_format = yaz_oid_recsyn_xml; p->rec_len = len_out; p->rec_buf = odr_malloc(p->odr, p->rec_len); memcpy(p->rec_buf, buf_out, p->rec_len); xmlFree(buf_out); } - else if (p->output_format == VAL_SUTRS) + else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs)) { xmlChar *buf_out; int len_out; @@ -1408,7 +1525,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) else xmlDocDumpMemory(doc, &buf_out, &len_out); - p->output_format = VAL_SUTRS; + p->output_format = yaz_oid_recsyn_sutrs; p->rec_len = len_out; p->rec_buf = odr_malloc(p->odr, p->rec_len); memcpy(p->rec_buf, buf_out, p->rec_len); @@ -1447,6 +1564,7 @@ idzebra_filter /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab