X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fxslt.c;h=6787510d478a003810ab6dce1f48205a5596730e;hb=d8b7f92dc036c81ed51a5f22b8c5bc9a5ae74487;hp=9c7de499f8e4994949cd4f67d776619692c0a9ba;hpb=824db602e8767538b3eb075a604f557ebc9c3085;p=idzebra-moved-to-github.git diff --git a/recctrl/xslt.c b/recctrl/xslt.c index 9c7de49..6787510 100644 --- a/recctrl/xslt.c +++ b/recctrl/xslt.c @@ -1,4 +1,4 @@ -/* $Id: xslt.c,v 1.6 2005-05-31 17:36:16 adam Exp $ +/* $Id: xslt.c,v 1.17 2005-08-24 08:30:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -28,6 +28,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include #include +#include #include #include @@ -40,21 +41,36 @@ struct filter_schema { const char *stylesheet; struct filter_schema *next; const char *default_schema; + const char *include_snippet; xsltStylesheetPtr stylesheet_xsp; }; struct filter_info { xmlDocPtr doc; char *fname; - int split_depth; + const char *split_level; + const char *split_path; ODR odr; struct filter_schema *schemas; xmlTextReaderPtr reader; }; -#define ZEBRA_INDEX_NS "http://indexdata.dk/zebra/indexing/1" -#define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1" -static const char *zebra_index_ns = ZEBRA_INDEX_NS; +#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1" + +#define XML_STRCMP(a,b) strcmp((char*)a, b) +#define XML_STRLEN(a) strlen((char*)a) + +static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS; + +static void set_param_xml(const char **params, const char *name, + const char *value, ODR odr) +{ + while (*params) + params++; + params[0] = name; + params[1] = value; + params[2] = 0; +} static void set_param_str(const char **params, const char *name, const char *value, ODR odr) @@ -80,34 +96,59 @@ static void set_param_int(const char **params, const char *name, params[2] = 0; } +#define ENABLE_INPUT_CALLBACK 0 -static void *filter_init_xslt(Res res, RecType recType) +#if ENABLE_INPUT_CALLBACK +static int zebra_xmlInputMatchCallback (char const *filename) +{ + yaz_log(YLOG_LOG, "match %s", filename); + return 0; +} + +static void * zebra_xmlInputOpenCallback (char const *filename) +{ + return 0; +} + +static int zebra_xmlInputReadCallback (void * context, char * buffer, int len) +{ + return 0; +} + +static int zebra_xmlInputCloseCallback (void * context) +{ + return 0; +} +#endif + +static void *filter_init(Res res, RecType recType) { struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo)); tinfo->reader = 0; tinfo->fname = 0; - tinfo->split_depth = 0; + tinfo->split_level = 0; + tinfo->split_path = 0; tinfo->odr = odr_createmem(ODR_ENCODE); tinfo->doc = 0; tinfo->schemas = 0; - return tinfo; -} -static void *filter_init_xslt1(Res res, RecType recType) -{ - struct filter_info *tinfo = (struct filter_info *) - filter_init_xslt(res, recType); - tinfo->split_depth = 1; +#if ENABLE_INPUT_CALLBACK + xmlRegisterDefaultInputCallbacks(); + xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback, + zebra_xmlInputOpenCallback, + zebra_xmlInputReadCallback, + zebra_xmlInputCloseCallback); +#endif return tinfo; } static int attr_content(struct _xmlAttr *attr, const char *name, const char **dst_content) { - if (!strcmp(attr->name, name) && attr->children && + if (!XML_STRCMP(attr->name, name) && attr->children && attr->children->type == XML_TEXT_NODE) { - *dst_content = attr->children->content; + *dst_content = (const char *)(attr->children->content); return 1; } return 0; @@ -140,12 +181,13 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) return ZEBRA_FAIL; ptr = xmlDocGetRootElement(tinfo->doc); if (!ptr || ptr->type != XML_ELEMENT_NODE || - strcmp(ptr->name, "schemaInfo")) + XML_STRCMP(ptr->name, "schemaInfo")) return ZEBRA_FAIL; for (ptr = ptr->children; ptr; ptr = ptr->next) { - if (ptr->type == XML_ELEMENT_NODE && - !strcmp(ptr->name, "schema")) + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (!XML_STRCMP(ptr->name, "schema")) { struct _xmlAttr *attr; struct filter_schema *schema = xmalloc(sizeof(*schema)); @@ -155,6 +197,7 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) schema->default_schema = 0; schema->next = tinfo->schemas; schema->stylesheet_xsp = 0; + schema->include_snippet = 0; tinfo->schemas = schema; for (attr = ptr->properties; attr; attr = attr->next) { @@ -162,12 +205,27 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) attr_content(attr, "name", &schema->name); attr_content(attr, "stylesheet", &schema->stylesheet); attr_content(attr, "default", &schema->default_schema); + attr_content(attr, "snippet", &schema->include_snippet); } if (schema->stylesheet) schema->stylesheet_xsp = xsltParseStylesheetFile( (const xmlChar*) schema->stylesheet); } + else if (!XML_STRCMP(ptr->name, "split")) + { + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "level", &tinfo->split_level); + attr_content(attr, "path", &tinfo->split_path); + } + } + else + { + yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname); + return ZEBRA_FAIL; + } } return ZEBRA_OK; } @@ -223,16 +281,16 @@ static int ioclose_ex(void *context) return 0; } -static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl, +static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl, xmlNodePtr ptr, RecWord *recWord) { for(; ptr; ptr = ptr->next) { - index_field(tinfo, ctrl, ptr->children, recWord); + index_cdata(tinfo, ctrl, ptr->children, recWord); if (ptr->type != XML_TEXT_NODE) continue; - recWord->term_buf = ptr->content; - recWord->term_len = strlen(ptr->content); + recWord->term_buf = (const char *)ptr->content; + recWord->term_len = XML_STRLEN(ptr->content); (*ctrl->tokenAdd)(recWord); } } @@ -244,31 +302,67 @@ static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, { index_node(tinfo, ctrl, ptr->children, recWord); if (ptr->type != XML_ELEMENT_NODE || !ptr->ns || - strcmp(ptr->ns->href, zebra_index_ns)) + XML_STRCMP(ptr->ns->href, zebra_xslt_ns)) continue; - if (!strcmp(ptr->name, "index")) + if (!XML_STRCMP(ptr->name, "index")) { - char *field_str = 0; + const char *name_str = 0; + const char *type_str = 0; const char *xpath_str = 0; struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { - if (!strcmp(attr->name, "field") - && attr->children && attr->children->type == XML_TEXT_NODE) - field_str = attr->children->content; - if (!strcmp(attr->name, "xpath") - && attr->children && attr->children->type == XML_TEXT_NODE) - xpath_str = attr->children->content; + attr_content(attr, "name", &name_str); + attr_content(attr, "xpath", &xpath_str); + attr_content(attr, "type", &type_str); } - if (field_str) + if (name_str) { - recWord->attrStr = field_str; - index_field(tinfo, ctrl, ptr->children, recWord); + int prev_type = recWord->index_type; /* save default type */ + + if (type_str && *type_str) + recWord->index_type = *type_str; /* type was given */ + recWord->index_name = name_str; + index_cdata(tinfo, ctrl, ptr->children, recWord); + + recWord->index_type = prev_type; /* restore it again */ } } } } +static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, + xmlNodePtr ptr, RecWord *recWord) +{ + if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns && + !XML_STRCMP(ptr->ns->href, zebra_xslt_ns) + && !XML_STRCMP(ptr->name, "record")) + { + const char *type_str = "update"; + const char *id_str = 0; + const char *rank_str = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "type", &type_str); + attr_content(attr, "id", &id_str); + attr_content(attr, "rank", &rank_str); + } + if (id_str) + sscanf(id_str, "%255s", ctrl->match_criteria); + if (rank_str) + { + ctrl->staticrank = atoi(rank_str); + yaz_log(YLOG_LOG, "rank=%d",ctrl->staticrank); + } + else + yaz_log(YLOG_LOG, "no rank"); + + ptr = ptr->children; + } + index_node(tinfo, ctrl, ptr, recWord); +} + static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, xmlDocPtr doc) { @@ -277,16 +371,16 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, xmlChar *buf_out; int len_out; - struct filter_schema *schema = lookup_schema(tinfo, ZEBRA_INDEX_NS); + struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns); params[0] = 0; - set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr); + set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr); (*p->init)(p, &recWord); - recWord.reg_type = 'w'; if (schema && schema->stylesheet_xsp) { + xmlNodePtr root_ptr; xmlDocPtr resDoc = xsltApplyStylesheet(schema->stylesheet_xsp, doc, params); @@ -296,7 +390,15 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, fwrite(buf_out, len_out, 1, stdout); xmlFree(buf_out); } - index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord); + root_ptr = xmlDocGetRootElement(resDoc); + if (root_ptr) + index_record(tinfo, p, root_ptr, &recWord); + else + { + yaz_log(YLOG_WARN, "No root for index XML record." + " split_level=%s stylesheet=%s", + tinfo->split_level, schema->stylesheet); + } xmlFreeDoc(resDoc); } xmlDocDumpMemory(doc, &buf_out, &len_out); @@ -312,6 +414,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) { int ret; + int split_depth = 0; if (p->first_record) { if (tinfo->reader) @@ -325,21 +428,24 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) if (!tinfo->reader) return RECCTRL_EXTRACT_ERROR_GENERIC; + if (tinfo->split_level) + split_depth = atoi(tinfo->split_level); ret = xmlTextReaderRead(tinfo->reader); while (ret == 1) { int type = xmlTextReaderNodeType(tinfo->reader); int depth = xmlTextReaderDepth(tinfo->reader); - if (tinfo->split_depth == 0 || - (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth)) + if (split_depth == 0 || + (split_depth > 0 && + type == XML_READER_TYPE_ELEMENT && split_depth == depth)) { xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader); xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); - xmlDocPtr doc = xmlNewDoc("1.0"); + xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); xmlDocSetRootElement(doc, ptr2); - return extract_doc(tinfo, p, doc); - } + return extract_doc(tinfo, p, doc); + } ret = xmlTextReaderRead(tinfo->reader); } xmlFreeTextReader(tinfo->reader); @@ -371,7 +477,7 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) odr_reset(tinfo->odr); - if (tinfo->split_depth == 0) + if (tinfo->split_level == 0 && tinfo->split_path == 0) return extract_full(tinfo, p); else { @@ -391,24 +497,77 @@ static int ioclose_ret(void *context) } +static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode, + int window_size) +{ + const char *xml_doc_str; + int ord = 0; + WRBUF wrbuf = wrbuf_alloc(); + zebra_snippets *res = + zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size); + zebra_snippet_word *w = zebra_snippets_list(res); + + if (text_mode) + wrbuf_printf(wrbuf, "\'"); + else + wrbuf_printf(wrbuf, "\n", zebra_xslt_ns); + for (; w; w = w->next) + { + if (ord == 0) + ord = w->ord; + else if (ord != w->ord) + + break; + if (text_mode) + wrbuf_printf(wrbuf, "%s%s%s ", + w->match ? "*" : "", + w->term, + w->match ? "*" : ""); + else + { + wrbuf_printf(wrbuf, " ", + w->ord, w->seqno, + (w->match ? "match='1'" : "")); + wrbuf_xmlputs(wrbuf, w->term); + wrbuf_printf(wrbuf, "\n"); + } + } + if (text_mode) + wrbuf_printf(wrbuf, "\'"); + else + wrbuf_printf(wrbuf, "\n"); + + xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf)); + + zebra_snippets_destroy(res); + wrbuf_free(wrbuf, 1); + return xml_doc_str; +} + static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) { - const char *esn = ZEBRA_SCHEMA_IDENTITY_NS; - const char *params[10]; + const char *esn = zebra_xslt_ns; + const char *params[20]; struct filter_info *tinfo = clientData; xmlDocPtr resDoc; xmlDocPtr doc; struct filter_schema *schema; + int window_size = -1; if (p->comp) { - if (p->comp->which != Z_RecordComp_simple - || p->comp->u.simple->which != Z_ElementSetNames_generic) + if (p->comp->which == Z_RecordComp_simple + && p->comp->u.simple->which == Z_ElementSetNames_generic) { - p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP; - return 0; + esn = p->comp->u.simple->u.generic; + } + else if (p->comp->which == Z_RecordComp_complex + && p->comp->u.complex->generic->elementSpec + && p->comp->u.complex->generic->elementSpec->which == + Z_ElementSpec_elementSetName) + { + esn = p->comp->u.complex->generic->elementSpec->u.elementSetName; } - esn = p->comp->u.simple->u.generic; } schema = lookup_schema(tinfo, esn); if (!schema) @@ -418,6 +577,9 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) return 0; } + if (schema->include_snippet) + window_size = atoi(schema->include_snippet); + params[0] = 0; set_param_str(params, "schema", esn, p->odr); if (p->fname) @@ -425,7 +587,11 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) if (p->score >= 0) set_param_int(params, "score", p->score, p->odr); set_param_int(params, "size", p->recordSize, p->odr); - + set_param_int(params, "id", p->localno, p->odr); + + if (window_size >= 0) + set_param_xml(params, "snippet", snippet_doc(p, 1, window_size), + p->odr); doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */, 0 /* URL */, 0 /* encoding */, @@ -436,6 +602,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) return 0; } + if (window_size >= 0) + { + xmlNodePtr node = xmlDocGetRootElement(doc); + const char *snippet_str = snippet_doc(p, 0, window_size); + xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str)); + xmlAddChild(node, xmlDocGetRootElement(snippet_doc)); + } if (!schema->stylesheet_xsp) resDoc = doc; else @@ -482,20 +655,10 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) return 0; } -static struct recType filter_type_xslt = { +static struct recType filter_type = { 0, "xslt", - filter_init_xslt, - filter_config, - filter_destroy, - filter_extract, - filter_retrieve -}; - -static struct recType filter_type_xslt1 = { - 0, - "xslt1", - filter_init_xslt1, + filter_init, filter_config, filter_destroy, filter_extract, @@ -510,9 +673,6 @@ idzebra_filter #endif [] = { - &filter_type_xslt, -#ifdef LIBXML_READER_ENABLED - &filter_type_xslt1, -#endif + &filter_type, 0, };