X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fxslt.c;h=a118277523758ae1215771644d4d860da9cb5e0c;hb=bb90b19c6082e427d5c014e6751a095a37a20b6d;hp=d2cf67a4d3c5469689789ae573fef752d40541fd;hpb=cb55ec9ab9fc8de32816e0125671e43ec0377bde;p=idzebra-moved-to-github.git diff --git a/recctrl/xslt.c b/recctrl/xslt.c index d2cf67a..a118277 100644 --- a/recctrl/xslt.c +++ b/recctrl/xslt.c @@ -1,4 +1,4 @@ -/* $Id: xslt.c,v 1.10 2005-06-15 15:30:05 adam Exp $ +/* $Id: xslt.c,v 1.14 2005-08-19 21:41:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -57,6 +57,9 @@ struct filter_info { #define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1" +#define XML_STRCMP(a,b) strcmp((char*)a, b) +#define XML_STRLEN(a) strlen((char*)a) + static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS; static void set_param_xml(const char **params, const char *name, @@ -93,33 +96,27 @@ static void set_param_int(const char **params, const char *name, params[2] = 0; } - -int zebra_xmlInputMatchCallback (char const *filename) +static int zebra_xmlInputMatchCallback (char const *filename) { yaz_log(YLOG_LOG, "match %s", filename); return 0; } - -void * zebra_xmlInputOpenCallback (char const *filename) +static void * zebra_xmlInputOpenCallback (char const *filename) { return 0; } -int zebra_xmlInputReadCallback (void * context, char * buffer, int len) +static int zebra_xmlInputReadCallback (void * context, char * buffer, int len) { return 0; } -int zebra_xmlInputCloseCallback (void * context) +static int zebra_xmlInputCloseCallback (void * context) { return 0; } - - - - static void *filter_init_xslt(Res res, RecType recType) { struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo)); @@ -141,21 +138,13 @@ static void *filter_init_xslt(Res res, RecType recType) return tinfo; } -static void *filter_init_xslt1(Res res, RecType recType) -{ - struct filter_info *tinfo = (struct filter_info *) - filter_init_xslt(res, recType); - tinfo->split_level = "1"; - return tinfo; -} - static int attr_content(struct _xmlAttr *attr, const char *name, const char **dst_content) { - if (!strcmp(attr->name, name) && attr->children && + if (!XML_STRCMP(attr->name, name) && attr->children && attr->children->type == XML_TEXT_NODE) { - *dst_content = attr->children->content; + *dst_content = (const char *)(attr->children->content); return 1; } return 0; @@ -188,13 +177,13 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) return ZEBRA_FAIL; ptr = xmlDocGetRootElement(tinfo->doc); if (!ptr || ptr->type != XML_ELEMENT_NODE || - strcmp(ptr->name, "schemaInfo")) + XML_STRCMP(ptr->name, "schemaInfo")) return ZEBRA_FAIL; for (ptr = ptr->children; ptr; ptr = ptr->next) { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp(ptr->name, "schema")) + if (!XML_STRCMP(ptr->name, "schema")) { struct _xmlAttr *attr; struct filter_schema *schema = xmalloc(sizeof(*schema)); @@ -219,7 +208,7 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) xsltParseStylesheetFile( (const xmlChar*) schema->stylesheet); } - else if (!strcmp(ptr->name, "split")) + else if (!XML_STRCMP(ptr->name, "split")) { struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) @@ -296,8 +285,8 @@ static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl, index_cdata(tinfo, ctrl, ptr->children, recWord); if (ptr->type != XML_TEXT_NODE) continue; - recWord->term_buf = ptr->content; - recWord->term_len = strlen(ptr->content); + recWord->term_buf = (const char *)ptr->content; + recWord->term_len = XML_STRLEN(ptr->content); (*ctrl->tokenAdd)(recWord); } } @@ -309,31 +298,67 @@ static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, { index_node(tinfo, ctrl, ptr->children, recWord); if (ptr->type != XML_ELEMENT_NODE || !ptr->ns || - strcmp(ptr->ns->href, zebra_xslt_ns)) + XML_STRCMP(ptr->ns->href, zebra_xslt_ns)) continue; - if (!strcmp(ptr->name, "index")) + if (!XML_STRCMP(ptr->name, "index")) { - char *name_str = 0; + const char *name_str = 0; + const char *type_str = 0; const char *xpath_str = 0; struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { - if (!strcmp(attr->name, "name") - && attr->children && attr->children->type == XML_TEXT_NODE) - name_str = attr->children->content; - if (!strcmp(attr->name, "xpath") - && attr->children && attr->children->type == XML_TEXT_NODE) - xpath_str = attr->children->content; + attr_content(attr, "name", &name_str); + attr_content(attr, "xpath", &xpath_str); + attr_content(attr, "type", &type_str); } if (name_str) { - recWord->attrStr = name_str; + int prev_type = recWord->index_type; /* save default type */ + + if (type_str && *type_str) + recWord->index_type = *type_str; /* type was given */ + recWord->index_name = name_str; index_cdata(tinfo, ctrl, ptr->children, recWord); + + recWord->index_type = prev_type; /* restore it again */ } } } } +static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, + xmlNodePtr ptr, RecWord *recWord) +{ + if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns && + !XML_STRCMP(ptr->ns->href, zebra_xslt_ns) + && !XML_STRCMP(ptr->name, "record")) + { + const char *type_str = "update"; + const char *id_str = 0; + const char *rank_str = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "type", &type_str); + attr_content(attr, "id", &id_str); + attr_content(attr, "rank", &rank_str); + } + if (id_str) + sscanf(id_str, "%255s", ctrl->match_criteria); + if (rank_str) + { + ctrl->staticrank = atoi(rank_str); + yaz_log(YLOG_LOG, "rank=%d",ctrl->staticrank); + } + else + yaz_log(YLOG_LOG, "no rank"); + + ptr = ptr->children; + } + index_node(tinfo, ctrl, ptr, recWord); +} + static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, xmlDocPtr doc) { @@ -348,10 +373,10 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr); (*p->init)(p, &recWord); - recWord.reg_type = 'w'; if (schema && schema->stylesheet_xsp) { + xmlNodePtr root_ptr; xmlDocPtr resDoc = xsltApplyStylesheet(schema->stylesheet_xsp, doc, params); @@ -361,7 +386,15 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, fwrite(buf_out, len_out, 1, stdout); xmlFree(buf_out); } - index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord); + root_ptr = xmlDocGetRootElement(resDoc); + if (root_ptr) + index_record(tinfo, p, root_ptr, &recWord); + else + { + yaz_log(YLOG_WARN, "No root for index XML record." + " split_level=%s stylesheet=%s", + tinfo->split_level, schema->stylesheet); + } xmlFreeDoc(resDoc); } xmlDocDumpMemory(doc, &buf_out, &len_out); @@ -403,7 +436,7 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) { xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader); xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); - xmlDocPtr doc = xmlNewDoc("1.0"); + xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); xmlDocSetRootElement(doc, ptr2); @@ -545,7 +578,8 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) if (p->score >= 0) set_param_int(params, "score", p->score, p->odr); set_param_int(params, "size", p->recordSize, p->odr); - + set_param_int(params, "id", p->localno, p->odr); + if (window_size >= 0) set_param_xml(params, "snippet", snippet_doc(p, 1, window_size), p->odr); @@ -622,16 +656,6 @@ static struct recType filter_type_xslt = { filter_retrieve }; -static struct recType filter_type_xslt1 = { - 0, - "xslt1", - filter_init_xslt1, - filter_config, - filter_destroy, - filter_extract, - filter_retrieve -}; - RecType #ifdef IDZEBRA_STATIC_XSLT idzebra_filter_xslt @@ -641,8 +665,5 @@ idzebra_filter [] = { &filter_type_xslt, -#ifdef LIBXML_READER_ENABLED - &filter_type_xslt1, -#endif 0, };