optimize for C source code
[idzebra-moved-to-github.git] / recctrl / xslt.c
index be771d3..6787510 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: xslt.c,v 1.8 2005-06-07 11:36:38 adam Exp $
+/* $Id: xslt.c,v 1.17 2005-08-24 08:30:37 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -28,6 +28,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <libxml/xmlversion.h>
 #include <libxml/parser.h>
 #include <libxml/tree.h>
+#include <libxml/xmlIO.h>
 #include <libxml/xmlreader.h>
 #include <libxslt/transform.h>
 
@@ -40,6 +41,7 @@ struct filter_schema {
     const char *stylesheet;
     struct filter_schema *next;
     const char *default_schema;
+    const char *include_snippet;
     xsltStylesheetPtr stylesheet_xsp;
 };
 
@@ -53,9 +55,12 @@ struct filter_info {
     xmlTextReaderPtr reader;
 };
 
-#define ZEBRA_INDEX_NS "http://indexdata.dk/zebra/indexing/1"
-#define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1"
-static const char *zebra_index_ns = ZEBRA_INDEX_NS;
+#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
+
+#define XML_STRCMP(a,b)   strcmp((char*)a, b)
+#define XML_STRLEN(a) strlen((char*)a)
+
+static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
 
 static void set_param_xml(const char **params, const char *name,
                          const char *value, ODR odr)
@@ -91,8 +96,32 @@ static void set_param_int(const char **params, const char *name,
     params[2] = 0;
 }
 
+#define ENABLE_INPUT_CALLBACK 0
+
+#if ENABLE_INPUT_CALLBACK
+static int zebra_xmlInputMatchCallback (char const *filename)
+{
+    yaz_log(YLOG_LOG, "match %s", filename);
+    return 0;
+}
+
+static void * zebra_xmlInputOpenCallback (char const *filename)
+{
+    return 0;
+}
+
+static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
+{
+    return 0;
+}
 
-static void *filter_init_xslt(Res res, RecType recType)
+static int zebra_xmlInputCloseCallback (void * context)
+{
+    return 0;
+}
+#endif
+
+static void *filter_init(Res res, RecType recType)
 {
     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
     tinfo->reader = 0;
@@ -102,24 +131,24 @@ static void *filter_init_xslt(Res res, RecType recType)
     tinfo->odr = odr_createmem(ODR_ENCODE);
     tinfo->doc = 0;
     tinfo->schemas = 0;
-    return tinfo;
-}
 
-static void *filter_init_xslt1(Res res, RecType recType)
-{
-    struct filter_info *tinfo = (struct filter_info *)
-       filter_init_xslt(res, recType);
-    tinfo->split_level = "1";
+#if ENABLE_INPUT_CALLBACK
+    xmlRegisterDefaultInputCallbacks();
+    xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
+                             zebra_xmlInputOpenCallback,
+                             zebra_xmlInputReadCallback,
+                             zebra_xmlInputCloseCallback);
+#endif
     return tinfo;
 }
 
 static int attr_content(struct _xmlAttr *attr, const char *name,
                        const char **dst_content)
 {
-    if (!strcmp(attr->name, name) && attr->children &&
+    if (!XML_STRCMP(attr->name, name) && attr->children &&
        attr->children->type == XML_TEXT_NODE)
     {
-       *dst_content = attr->children->content;
+       *dst_content = (const char *)(attr->children->content);
        return 1;
     }
     return 0;
@@ -152,13 +181,13 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
        return ZEBRA_FAIL;
     ptr = xmlDocGetRootElement(tinfo->doc);
     if (!ptr || ptr->type != XML_ELEMENT_NODE ||
-       strcmp(ptr->name, "schemaInfo"))
+       XML_STRCMP(ptr->name, "schemaInfo"))
        return ZEBRA_FAIL;
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
        if (ptr->type != XML_ELEMENT_NODE)
            continue;
-       if (!strcmp(ptr->name, "schema"))
+       if (!XML_STRCMP(ptr->name, "schema"))
        {
            struct _xmlAttr *attr;
            struct filter_schema *schema = xmalloc(sizeof(*schema));
@@ -168,6 +197,7 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
            schema->default_schema = 0;
            schema->next = tinfo->schemas;
            schema->stylesheet_xsp = 0;
+           schema->include_snippet = 0;
            tinfo->schemas = schema;
            for (attr = ptr->properties; attr; attr = attr->next)
            {
@@ -175,13 +205,14 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
                attr_content(attr, "name", &schema->name);
                attr_content(attr, "stylesheet", &schema->stylesheet);
                attr_content(attr, "default", &schema->default_schema);
+               attr_content(attr, "snippet", &schema->include_snippet);
            }
            if (schema->stylesheet)
                schema->stylesheet_xsp =
                    xsltParseStylesheetFile(
                        (const xmlChar*) schema->stylesheet);
        }
-       else if (!strcmp(ptr->name, "split"))
+       else if (!XML_STRCMP(ptr->name, "split"))
        {
            struct _xmlAttr *attr;
            for (attr = ptr->properties; attr; attr = attr->next)
@@ -250,16 +281,16 @@ static int ioclose_ex(void *context)
     return 0;
 }
 
-static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
+static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
                        xmlNodePtr ptr, RecWord *recWord)
 {
     for(; ptr; ptr = ptr->next)
     {
-       index_field(tinfo, ctrl, ptr->children, recWord);
+       index_cdata(tinfo, ctrl, ptr->children, recWord);
        if (ptr->type != XML_TEXT_NODE)
            continue;
-       recWord->term_buf = ptr->content;
-       recWord->term_len = strlen(ptr->content);
+       recWord->term_buf = (const char *)ptr->content;
+       recWord->term_len = XML_STRLEN(ptr->content);
        (*ctrl->tokenAdd)(recWord);
     }
 }
@@ -271,31 +302,67 @@ static void index_node(struct filter_info *tinfo,  struct recExtractCtrl *ctrl,
     {
        index_node(tinfo, ctrl, ptr->children, recWord);
        if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
-           strcmp(ptr->ns->href, zebra_index_ns))
+           XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
            continue;
-       if (!strcmp(ptr->name, "index"))
+       if (!XML_STRCMP(ptr->name, "index"))
        {
-           char *field_str = 0;
+           const char *name_str = 0;
+           const char *type_str = 0;
            const char *xpath_str = 0;
            struct _xmlAttr *attr;
            for (attr = ptr->properties; attr; attr = attr->next)
            {
-               if (!strcmp(attr->name, "field") 
-                   && attr->children && attr->children->type == XML_TEXT_NODE)
-                   field_str = attr->children->content;
-               if (!strcmp(attr->name, "xpath") 
-                   && attr->children && attr->children->type == XML_TEXT_NODE)
-                   xpath_str = attr->children->content;
+               attr_content(attr, "name", &name_str);
+               attr_content(attr, "xpath", &xpath_str);
+               attr_content(attr, "type", &type_str);
            }
-           if (field_str)
+           if (name_str)
            {
-               recWord->attrStr = field_str;
-               index_field(tinfo, ctrl, ptr->children, recWord);
+               int prev_type = recWord->index_type; /* save default type */
+
+               if (type_str && *type_str)
+                   recWord->index_type = *type_str; /* type was given */
+               recWord->index_name = name_str;
+               index_cdata(tinfo, ctrl, ptr->children, recWord);
+
+               recWord->index_type = prev_type;     /* restore it again */
            }
        }
     }
 }
 
+static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
+                        xmlNodePtr ptr, RecWord *recWord)
+{
+    if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
+       !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
+       && !XML_STRCMP(ptr->name, "record"))
+    {
+       const char *type_str = "update";
+       const char *id_str = 0;
+       const char *rank_str = 0;
+       struct _xmlAttr *attr;
+       for (attr = ptr->properties; attr; attr = attr->next)
+       {
+           attr_content(attr, "type", &type_str);
+           attr_content(attr, "id", &id_str);
+           attr_content(attr, "rank", &rank_str);
+       }
+       if (id_str)
+           sscanf(id_str, "%255s", ctrl->match_criteria);
+       if (rank_str)
+       {
+           ctrl->staticrank = atoi(rank_str);
+           yaz_log(YLOG_LOG, "rank=%d",ctrl->staticrank);
+       }
+       else
+           yaz_log(YLOG_LOG, "no rank");
+       
+       ptr = ptr->children;
+    }
+    index_node(tinfo, ctrl, ptr, recWord);
+}
+    
 static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
                       xmlDocPtr doc)
 {
@@ -304,16 +371,16 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
     xmlChar *buf_out;
     int len_out;
 
-    struct filter_schema *schema = lookup_schema(tinfo, ZEBRA_INDEX_NS);
+    struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
 
     params[0] = 0;
-    set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr);
+    set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
 
     (*p->init)(p, &recWord);
-    recWord.reg_type = 'w';
 
     if (schema && schema->stylesheet_xsp)
     {
+       xmlNodePtr root_ptr;
        xmlDocPtr resDoc = 
            xsltApplyStylesheet(schema->stylesheet_xsp,
                                doc, params);
@@ -323,7 +390,15 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
            fwrite(buf_out, len_out, 1, stdout);
            xmlFree(buf_out);
        }
-       index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
+       root_ptr = xmlDocGetRootElement(resDoc);
+       if (root_ptr)
+           index_record(tinfo, p, root_ptr, &recWord);
+       else
+       {
+           yaz_log(YLOG_WARN, "No root for index XML record."
+                   " split_level=%s stylesheet=%s",
+                   tinfo->split_level, schema->stylesheet);
+       }
        xmlFreeDoc(resDoc);
     }
     xmlDocDumpMemory(doc, &buf_out, &len_out);
@@ -365,12 +440,12 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
        {
            xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
            xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
-           xmlDocPtr doc = xmlNewDoc("1.0");
+           xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
 
            xmlDocSetRootElement(doc, ptr2);
 
-           return extract_doc(tinfo, p, doc);      
-       }
+           return extract_doc(tinfo, p, doc);   
+       }
        ret = xmlTextReaderRead(tinfo->reader);
     }
     xmlFreeTextReader(tinfo->reader);
@@ -422,44 +497,46 @@ static int ioclose_ret(void *context)
 }
 
 
-static const char *snippet_doc(struct recRetrieveCtrl *p)
+static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
+                              int window_size)
 {
     const char *xml_doc_str;
     int ord = 0;
     WRBUF wrbuf = wrbuf_alloc();
     zebra_snippets *res = 
-       zebra_snippets_window(p->doc_snippet, p->hit_snippet, 10);
+       zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size);
     zebra_snippet_word *w = zebra_snippets_list(res);
 
-#if 1
-    wrbuf_printf(wrbuf, "\'");
-#else
-    wrbuf_printf(wrbuf, "<snippet>\n");
-#endif
+    if (text_mode)
+       wrbuf_printf(wrbuf, "\'");
+    else
+       wrbuf_printf(wrbuf, "<snippet xmlns='%s'>\n", zebra_xslt_ns);
     for (; w; w = w->next)
     {
        if (ord == 0)
            ord = w->ord;
        else if (ord != w->ord)
+
            break;
-#if 1
-       wrbuf_printf(wrbuf, "%s%s%s ", 
-                    w->match ? "*" : "",
-                    w->term,
-                    w->match ? "*" : "");
-#else
-       wrbuf_printf(wrbuf, " <term %s ord='%d' seqno='%d'>", 
-                    (w->match ? "match='1'" : ""),
-                    w->ord, w->seqno);
-       wrbuf_xmlputs(wrbuf, w->term);
-       wrbuf_printf(wrbuf, "</term>\n");
-#endif
+       if (text_mode)
+           wrbuf_printf(wrbuf, "%s%s%s ", 
+                        w->match ? "*" : "",
+                        w->term,
+                        w->match ? "*" : "");
+       else
+       {
+           wrbuf_printf(wrbuf, " <term ord='%d' seqno='" ZINT_FORMAT "' %s>", 
+                        w->ord, w->seqno,
+                        (w->match ? "match='1'" : ""));
+           wrbuf_xmlputs(wrbuf, w->term);
+           wrbuf_printf(wrbuf, "</term>\n");
+       }
     }
-#if 1
-    wrbuf_printf(wrbuf, "\'");
-#else
-    wrbuf_printf(wrbuf, "</snippet>\n");
-#endif
+    if (text_mode)
+       wrbuf_printf(wrbuf, "\'");
+    else
+       wrbuf_printf(wrbuf, "</snippet>\n");
+
     xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
 
     zebra_snippets_destroy(res);
@@ -469,22 +546,28 @@ static const char *snippet_doc(struct recRetrieveCtrl *p)
 
 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
 {
-    const char *esn = ZEBRA_SCHEMA_IDENTITY_NS;
-    const char *params[10];
+    const char *esn = zebra_xslt_ns;
+    const char *params[20];
     struct filter_info *tinfo = clientData;
     xmlDocPtr resDoc;
     xmlDocPtr doc;
     struct filter_schema *schema;
+    int window_size = -1;
 
     if (p->comp)
     {
-       if (p->comp->which != Z_RecordComp_simple
-           || p->comp->u.simple->which != Z_ElementSetNames_generic)
+       if (p->comp->which == Z_RecordComp_simple
+           && p->comp->u.simple->which == Z_ElementSetNames_generic)
        {
-           p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
-           return 0;
+           esn = p->comp->u.simple->u.generic;
+       }
+       else if (p->comp->which == Z_RecordComp_complex 
+                && p->comp->u.complex->generic->elementSpec
+                && p->comp->u.complex->generic->elementSpec->which ==
+                Z_ElementSpec_elementSetName)
+       {
+           esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
        }
-       esn = p->comp->u.simple->u.generic;
     }
     schema = lookup_schema(tinfo, esn);
     if (!schema)
@@ -494,6 +577,9 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
        return 0;
     }
 
+    if (schema->include_snippet)
+       window_size = atoi(schema->include_snippet);
+
     params[0] = 0;
     set_param_str(params, "schema", esn, p->odr);
     if (p->fname)
@@ -501,8 +587,11 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     if (p->score >= 0)
        set_param_int(params, "score", p->score, p->odr);
     set_param_int(params, "size", p->recordSize, p->odr);
-    
-    set_param_xml(params, "snippet", snippet_doc(p), p->odr);
+    set_param_int(params, "id", p->localno, p->odr);
+
+    if (window_size >= 0)
+       set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
+                     p->odr);
     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
                    0 /* URL */,
                    0 /* encoding */,
@@ -513,6 +602,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
        return 0;
     }
 
+    if (window_size >= 0)
+    {
+       xmlNodePtr node = xmlDocGetRootElement(doc);
+       const char *snippet_str = snippet_doc(p, 0, window_size);
+       xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str));
+       xmlAddChild(node, xmlDocGetRootElement(snippet_doc));
+    }
     if (!schema->stylesheet_xsp)
        resDoc = doc;
     else
@@ -559,20 +655,10 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     return 0;
 }
 
-static struct recType filter_type_xslt = {
+static struct recType filter_type = {
     0,
     "xslt",
-    filter_init_xslt,
-    filter_config,
-    filter_destroy,
-    filter_extract,
-    filter_retrieve
-};
-
-static struct recType filter_type_xslt1 = {
-    0,
-    "xslt1",
-    filter_init_xslt1,
+    filter_init,
     filter_config,
     filter_destroy,
     filter_extract,
@@ -587,9 +673,6 @@ idzebra_filter
 #endif
 
 [] = {
-    &filter_type_xslt,
-#ifdef LIBXML_READER_ENABLED
-    &filter_type_xslt1,
-#endif
+    &filter_type,
     0,
 };