X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Falvis.c;h=a752890d8acc320f689435f3f7a99e7c7f5145ae;hp=40405e1c33cfe9bc4304d7eb6ea8fda61d0e6a95;hb=78b13a3ac6a79768fb609c14db2a8e0c94a9c4da;hpb=852d5f1f9aa0a70f7e54a68143ee86752394a2f2 diff --git a/index/alvis.c b/index/alvis.c index 40405e1..a752890 100644 --- a/index/alvis.c +++ b/index/alvis.c @@ -1,8 +1,5 @@ -/* $Id: alvis.c,v 1.3 2006-08-22 13:39:26 adam Exp $ - Copyright (C) 1995-2006 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1995-2008 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -26,6 +23,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include #include #include @@ -35,6 +33,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#if YAZ_HAVE_EXSLT +#include +#endif + #include #include @@ -45,7 +47,6 @@ struct filter_schema { struct filter_schema *next; const char *default_schema; /* char default_schema; */ - const char *include_snippet; xsltStylesheetPtr stylesheet_xsp; }; @@ -54,7 +55,7 @@ struct filter_info { char *fname; char *full_name; const char *profile_path; - const char *split_level; + int split_level; const char *split_path; ODR odr; struct filter_schema *schemas; @@ -68,16 +69,6 @@ struct filter_info { static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS; -static void set_param_xml(const char **params, const char *name, - const char *value, ODR odr) -{ - while (*params) - params++; - params[0] = name; - params[1] = value; - params[2] = 0; -} - static void set_param_str(const char **params, const char *name, const char *value, ODR odr) { @@ -140,6 +131,10 @@ static void *filter_init(Res res, RecType recType) tinfo->doc = 0; tinfo->schemas = 0; +#if YAZ_HAVE_EXSLT + exsltRegisterAll(); +#endif + #if ENABLE_INPUT_CALLBACK xmlRegisterDefaultInputCallbacks(); xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback, @@ -153,8 +148,8 @@ static void *filter_init(Res res, RecType recType) static int attr_content(struct _xmlAttr *attr, const char *name, const char **dst_content) { - if (!XML_STRCMP(attr->name, name) && attr->children && - attr->children->type == XML_TEXT_NODE) + if (!XML_STRCMP(attr->name, name) && attr->children + && attr->children->type == XML_TEXT_NODE) { *dst_content = (const char *)(attr->children->content); return 1; @@ -185,27 +180,29 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) char tmp_full_name[1024]; xmlNodePtr ptr; tinfo->fname = xstrdup(fname); - - if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, + + if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, NULL, tmp_full_name)) - tinfo->full_name = xstrdup(tmp_full_name); + tinfo->full_name = xstrdup(tmp_full_name); else - tinfo->full_name = xstrdup(tinfo->fname); - + tinfo->full_name = xstrdup(tinfo->fname); + yaz_log(YLOG_LOG, "alvis filter: loading config file %s", tinfo->full_name); - + tinfo->doc = xmlParseFile(tinfo->full_name); - - if (!tinfo->doc){ + + if (!tinfo->doc) + { yaz_log(YLOG_WARN, "alvis filter: could not parse config file %s", tinfo->full_name); - + return ZEBRA_FAIL; } ptr = xmlDocGetRootElement(tinfo->doc); - if (!ptr || ptr->type != XML_ELEMENT_NODE || - XML_STRCMP(ptr->name, "schemaInfo")){ + if (!ptr || ptr->type != XML_ELEMENT_NODE + || XML_STRCMP(ptr->name, "schemaInfo")) + { yaz_log(YLOG_WARN, "alvis filter: config file %s :" " expected root element ", @@ -219,7 +216,6 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) continue; if (!XML_STRCMP(ptr->name, "schema")) { - char tmp_xslt_full_name[1024]; struct _xmlAttr *attr; struct filter_schema *schema = xmalloc(sizeof(*schema)); schema->name = 0; @@ -228,7 +224,6 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) schema->default_schema = 0; schema->next = tinfo->schemas; schema->stylesheet_xsp = 0; - schema->include_snippet = 0; tinfo->schemas = schema; for (attr = ptr->properties; attr; attr = attr->next) { @@ -236,33 +231,43 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) attr_content(attr, "name", &schema->name); attr_content(attr, "stylesheet", &schema->stylesheet); attr_content(attr, "default", &schema->default_schema); - attr_content(attr, "snippet", &schema->include_snippet); } /*yaz_log(YLOG_LOG, "XSLT add %s %s %s", schema->name, schema->identifier, schema->stylesheet); */ /* find requested schema */ - if (schema->stylesheet){ - yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, - NULL, tmp_xslt_full_name); - schema->stylesheet_xsp - = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name); - if (!schema->stylesheet_xsp) - yaz_log(YLOG_WARN, - "alvis filter: could not parse xslt stylesheet %s", - tmp_xslt_full_name); + if (schema->stylesheet) + { + char tmp_xslt_full_name[1024]; + if (!yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, + NULL, tmp_xslt_full_name)) + { + yaz_log(YLOG_WARN, + "alvis filter: stylesheet %s not found in path %s", + schema->stylesheet, tinfo->profile_path); + return ZEBRA_FAIL; + } + schema->stylesheet_xsp + = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name); + if (!schema->stylesheet_xsp) + { + yaz_log(YLOG_WARN, + "alvis filter: could not parse xslt stylesheet %s", + tmp_xslt_full_name); + return ZEBRA_FAIL; + } } - - } else if (!XML_STRCMP(ptr->name, "split")) { struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { - attr_content(attr, "level", &tinfo->split_level); - attr_content(attr, "path", &tinfo->split_path); + const char *split_level_str = 0; + attr_content(attr, "level", &split_level_str); + tinfo->split_level = + split_level_str ? atoi(split_level_str) : 0; } } else @@ -305,28 +310,27 @@ static struct filter_schema *lookup_schema(struct filter_info *tinfo, static ZEBRA_RES filter_config(void *clientData, Res res, const char *args) { struct filter_info *tinfo = clientData; - if (!args || !*args){ - yaz_log(YLOG_WARN, "alvis filter: need config file"); - return ZEBRA_FAIL; + if (!args || !*args) + { + yaz_log(YLOG_WARN, "alvis filter: need config file"); + return ZEBRA_FAIL; } if (tinfo->fname && !strcmp(args, tinfo->fname)) return ZEBRA_OK; - tinfo->profile_path - /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */ - = res_get(res, "profilePath"); + tinfo->profile_path = res_get(res, "profilePath"); yaz_log(YLOG_LOG, "alvis filter: profilePath %s", tinfo->profile_path); destroy_schemas(tinfo); - create_schemas(tinfo, args); - return ZEBRA_OK; + return create_schemas(tinfo, args); } static void filter_destroy(void *clientData) { struct filter_info *tinfo = clientData; destroy_schemas(tinfo); + xfree(tinfo->full_name); if (tinfo->reader) xmlFreeTextReader(tinfo->reader); odr_destroy(tinfo->odr); @@ -381,10 +385,10 @@ static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, } if (name_str) { - int prev_type = recWord->index_type; /* save default type */ + const char *prev_type = recWord->index_type; /* save default type */ if (type_str && *type_str) - recWord->index_type = *type_str; /* type was given */ + recWord->index_type = (const char *) type_str; /* type was given */ recWord->index_name = name_str; index_cdata(tinfo, ctrl, ptr->children, recWord); @@ -416,8 +420,7 @@ static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, sscanf(id_str, "%255s", ctrl->match_criteria); if (rank_str) - ctrl->staticrank = atoi(rank_str); - + ctrl->staticrank = atozint(rank_str); ptr = ptr->children; } @@ -463,7 +466,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, else { yaz_log(YLOG_WARN, "No root for index XML record." - " split_level=%s stylesheet=%s", + " split_level=%d stylesheet=%s", tinfo->split_level, schema->stylesheet); } xmlFreeDoc(resDoc); @@ -471,7 +474,8 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, xmlDocDumpMemory(doc, &buf_out, &len_out); if (p->flagShowRecords) fwrite(buf_out, len_out, 1, stdout); - (*p->setStoreData)(p, buf_out, len_out); + if (p->setStoreData) + (*p->setStoreData)(p, buf_out, len_out); xmlFree(buf_out); xmlFreeDoc(doc); @@ -481,7 +485,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) { int ret; - int split_depth = 0; + if (p->first_record) { if (tinfo->reader) @@ -490,28 +494,36 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE); + XML_PARSE_XINCLUDE + | XML_PARSE_NOENT + | XML_PARSE_NONET); } if (!tinfo->reader) return RECCTRL_EXTRACT_ERROR_GENERIC; - if (tinfo->split_level) - split_depth = atoi(tinfo->split_level); ret = xmlTextReaderRead(tinfo->reader); - while (ret == 1) { + while (ret == 1) + { int type = xmlTextReaderNodeType(tinfo->reader); int depth = xmlTextReaderDepth(tinfo->reader); - if (split_depth == 0 || - (split_depth > 0 && - type == XML_READER_TYPE_ELEMENT && split_depth == depth)) + if (type == XML_READER_TYPE_ELEMENT && tinfo->split_level == depth) { xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader); - xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); - xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); - - xmlDocSetRootElement(doc, ptr2); - - return extract_doc(tinfo, p, doc); + if (ptr) + { + xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); + xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); + + xmlDocSetRootElement(doc, ptr2); + + return extract_doc(tinfo, p, doc); + } + else + { + xmlFreeTextReader(tinfo->reader); + tinfo->reader = 0; + return RECCTRL_EXTRACT_ERROR_GENERIC; + } } ret = xmlTextReaderRead(tinfo->reader); } @@ -524,18 +536,24 @@ static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p) { if (p->first_record) /* only one record per stream */ { - xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */, - 0 /* URL */, - 0 /* encoding */, - XML_PARSE_XINCLUDE); - if (!doc) - { - return RECCTRL_EXTRACT_ERROR_GENERIC; - } - return extract_doc(tinfo, p, doc); + xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */, + 0 /* URL */, + 0 /* encoding */, + XML_PARSE_XINCLUDE + | XML_PARSE_NOENT + | XML_PARSE_NONET); + if (!doc) + return RECCTRL_EXTRACT_ERROR_GENERIC; + /* else { + xmlNodePtr root = xmlDocGetRootElement(doc); + if (!root) + return RECCTRL_EXTRACT_ERROR_GENERIC; + } */ + + return extract_doc(tinfo, p, doc); } else - return RECCTRL_EXTRACT_EOF; + return RECCTRL_EXTRACT_EOF; } static int filter_extract(void *clientData, struct recExtractCtrl *p) @@ -543,13 +561,10 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) struct filter_info *tinfo = clientData; odr_reset(tinfo->odr); - - if (tinfo->split_level == 0 && tinfo->split_path == 0) - return extract_full(tinfo, p); + if (tinfo->split_level == 0 || p->setStoreData == 0) + return extract_full(tinfo, p); else - { - return extract_split(tinfo, p); - } + return extract_split(tinfo, p); } static int ioread_ret(void *context, char *buffer, int len) @@ -563,54 +578,6 @@ static int ioclose_ret(void *context) return 0; } - -static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode, - int window_size) -{ - const char *xml_doc_str; - int ord = 0; - WRBUF wrbuf = wrbuf_alloc(); - zebra_snippets *res = - zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size); - zebra_snippet_word *w = zebra_snippets_list(res); - - if (text_mode) - wrbuf_printf(wrbuf, "\'"); - else - wrbuf_printf(wrbuf, "\n", zebra_xslt_ns); - for (; w; w = w->next) - { - if (ord == 0) - ord = w->ord; - else if (ord != w->ord) - - break; - if (text_mode) - wrbuf_printf(wrbuf, "%s%s%s ", - w->match ? "*" : "", - w->term, - w->match ? "*" : ""); - else - { - wrbuf_printf(wrbuf, " ", - w->ord, w->seqno, - (w->match ? "match='1'" : "")); - wrbuf_xmlputs(wrbuf, w->term); - wrbuf_printf(wrbuf, "\n"); - } - } - if (text_mode) - wrbuf_printf(wrbuf, "\'"); - else - wrbuf_printf(wrbuf, "\n"); - - xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf)); - - zebra_snippets_destroy(res); - wrbuf_free(wrbuf, 1); - return xml_doc_str; -} - static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { /* const char *esn = zebra_xslt_ns; */ @@ -620,7 +587,6 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) xmlDocPtr resDoc; xmlDocPtr doc; struct filter_schema *schema; - int window_size = -1; if (p->comp) { @@ -645,9 +611,6 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) return 0; } - if (schema->include_snippet) - window_size = atoi(schema->include_snippet); - params[0] = 0; set_param_int(params, "id", p->localno, p->odr); if (p->fname) @@ -669,26 +632,16 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) set_param_int(params, "score", p->score, p->odr); set_param_int(params, "size", p->recordSize, p->odr); - if (window_size >= 0) - set_param_xml(params, "snippet", snippet_doc(p, 1, window_size), - p->odr); doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, - XML_PARSE_XINCLUDE); + XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET); if (!doc) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; return 0; } - if (window_size >= 0) - { - xmlNodePtr node = xmlDocGetRootElement(doc); - const char *snippet_str = snippet_doc(p, 0, window_size); - xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str)); - xmlAddChild(node, xmlDocGetRootElement(snippet_doc)); - } if (!schema->stylesheet_xsp) resDoc = doc; else @@ -701,29 +654,36 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; } - else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML) + else if (!p->input_format + || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml)) { xmlChar *buf_out; int len_out; - xsltSaveResultToString(&buf_out, &len_out, resDoc, - schema->stylesheet_xsp); + if (schema->stylesheet_xsp) + xsltSaveResultToString(&buf_out, &len_out, resDoc, + schema->stylesheet_xsp); + else + xmlDocDumpMemory(resDoc, &buf_out, &len_out); - p->output_format = VAL_TEXT_XML; + p->output_format = yaz_oid_recsyn_xml; p->rec_len = len_out; p->rec_buf = odr_malloc(p->odr, p->rec_len); memcpy(p->rec_buf, buf_out, p->rec_len); xmlFree(buf_out); } - else if (p->output_format == VAL_SUTRS) + else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs)) { xmlChar *buf_out; int len_out; - xsltSaveResultToString(&buf_out, &len_out, resDoc, - schema->stylesheet_xsp); + if (schema->stylesheet_xsp) + xsltSaveResultToString(&buf_out, &len_out, resDoc, + schema->stylesheet_xsp); + else + xmlDocDumpMemory(resDoc, &buf_out, &len_out); - p->output_format = VAL_SUTRS; + p->output_format = yaz_oid_recsyn_sutrs; p->rec_len = len_out; p->rec_buf = odr_malloc(p->odr, p->rec_len); memcpy(p->rec_buf, buf_out, p->rec_len);