updated Deb control description so that IDZebra is not mentioned
[idzebra-moved-to-github.git] / index / alvis.c
index 40405e1..a752890 100644 (file)
@@ -1,8 +1,5 @@
-/* $Id: alvis.c,v 1.3 2006-08-22 13:39:26 adam Exp $
-   Copyright (C) 1995-2006
-   Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+   Copyright (C) 1995-2008 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -26,6 +23,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <yaz/diagbib1.h>
 #include <yaz/tpath.h>
+#include <yaz/oid_db.h>
 
 #include <libxml/xmlversion.h>
 #include <libxml/parser.h>
@@ -35,6 +33,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <libxslt/transform.h>
 #include <libxslt/xsltutils.h>
 
+#if YAZ_HAVE_EXSLT
+#include <libexslt/exslt.h>
+#endif
+
 #include <idzebra/util.h>
 #include <idzebra/recctrl.h>
 
@@ -45,7 +47,6 @@ struct filter_schema {
     struct filter_schema *next;
     const char *default_schema;
     /* char default_schema; */
-    const char *include_snippet;
     xsltStylesheetPtr stylesheet_xsp;
 };
 
@@ -54,7 +55,7 @@ struct filter_info {
     char *fname;
     char *full_name;
     const char *profile_path;
-    const char *split_level;
+    int split_level;
     const char *split_path;
     ODR odr;
     struct filter_schema *schemas;
@@ -68,16 +69,6 @@ struct filter_info {
 
 static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
 
-static void set_param_xml(const char **params, const char *name,
-                         const char *value, ODR odr)
-{
-    while (*params)
-       params++;
-    params[0] = name;
-    params[1] = value;
-    params[2] = 0;
-}
-
 static void set_param_str(const char **params, const char *name,
                          const char *value, ODR odr)
 {
@@ -140,6 +131,10 @@ static void *filter_init(Res res, RecType recType)
     tinfo->doc = 0;
     tinfo->schemas = 0;
 
+#if YAZ_HAVE_EXSLT
+    exsltRegisterAll(); 
+#endif
+
 #if ENABLE_INPUT_CALLBACK
     xmlRegisterDefaultInputCallbacks();
     xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
@@ -153,8 +148,8 @@ static void *filter_init(Res res, RecType recType)
 static int attr_content(struct _xmlAttr *attr, const char *name,
                        const char **dst_content)
 {
-    if (!XML_STRCMP(attr->name, name) && attr->children &&
-       attr->children->type == XML_TEXT_NODE)
+    if (!XML_STRCMP(attr->name, name) && attr->children 
+        && attr->children->type == XML_TEXT_NODE)
     {
        *dst_content = (const char *)(attr->children->content);
        return 1;
@@ -185,27 +180,29 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
     char tmp_full_name[1024];
     xmlNodePtr ptr;
     tinfo->fname = xstrdup(fname);
-
-   if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
+    
+    if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
                              NULL, tmp_full_name))
-      tinfo->full_name = xstrdup(tmp_full_name);
+        tinfo->full_name = xstrdup(tmp_full_name);
     else
-      tinfo->full_name = xstrdup(tinfo->fname);
-
+        tinfo->full_name = xstrdup(tinfo->fname);
+    
     yaz_log(YLOG_LOG, "alvis filter: loading config file %s", tinfo->full_name);
-
+    
     tinfo->doc = xmlParseFile(tinfo->full_name);
-
-    if (!tinfo->doc){
+    
+    if (!tinfo->doc)
+    {
         yaz_log(YLOG_WARN, "alvis filter: could not parse config file %s", 
                 tinfo->full_name);
-
+        
        return ZEBRA_FAIL;
     }
     
     ptr = xmlDocGetRootElement(tinfo->doc);
-    if (!ptr || ptr->type != XML_ELEMENT_NODE ||
-       XML_STRCMP(ptr->name, "schemaInfo")){
+    if (!ptr || ptr->type != XML_ELEMENT_NODE 
+        || XML_STRCMP(ptr->name, "schemaInfo"))
+    {
         yaz_log(YLOG_WARN, 
                 "alvis filter:  config file %s :" 
                 " expected root element <schemaInfo>", 
@@ -219,7 +216,6 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
            continue;
        if (!XML_STRCMP(ptr->name, "schema"))
        {  
-            char tmp_xslt_full_name[1024];
            struct _xmlAttr *attr;
            struct filter_schema *schema = xmalloc(sizeof(*schema));
            schema->name = 0;
@@ -228,7 +224,6 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
            schema->default_schema = 0;
            schema->next = tinfo->schemas;
            schema->stylesheet_xsp = 0;
-           schema->include_snippet = 0;
            tinfo->schemas = schema;
            for (attr = ptr->properties; attr; attr = attr->next)
            {
@@ -236,33 +231,43 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
                attr_content(attr, "name", &schema->name);
                attr_content(attr, "stylesheet", &schema->stylesheet);
                attr_content(attr, "default", &schema->default_schema);
-               attr_content(attr, "snippet", &schema->include_snippet);
            }
             /*yaz_log(YLOG_LOG, "XSLT add %s %s %s", 
               schema->name, schema->identifier, schema->stylesheet); */
 
             /* find requested schema */
 
-           if (schema->stylesheet){
-              yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, 
-                                   NULL, tmp_xslt_full_name);
-              schema->stylesheet_xsp 
-                = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
-              if (!schema->stylesheet_xsp)
-                yaz_log(YLOG_WARN, 
-                        "alvis filter: could not parse xslt stylesheet %s", 
-                        tmp_xslt_full_name);
+           if (schema->stylesheet)
+            {
+                char tmp_xslt_full_name[1024];
+                if (!yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, 
+                                          NULL, tmp_xslt_full_name)) 
+                {
+                    yaz_log(YLOG_WARN, 
+                            "alvis filter: stylesheet %s not found in path %s",
+                            schema->stylesheet, tinfo->profile_path);
+                    return ZEBRA_FAIL;
+                }
+                schema->stylesheet_xsp 
+                    = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
+                if (!schema->stylesheet_xsp)
+                {
+                    yaz_log(YLOG_WARN, 
+                            "alvis filter: could not parse xslt stylesheet %s", 
+                            tmp_xslt_full_name);
+                    return ZEBRA_FAIL;
+                }
             }
-            
-                
        }
        else if (!XML_STRCMP(ptr->name, "split"))
        {
            struct _xmlAttr *attr;
            for (attr = ptr->properties; attr; attr = attr->next)
            {
-               attr_content(attr, "level", &tinfo->split_level);
-               attr_content(attr, "path", &tinfo->split_path);
+                const char *split_level_str = 0;
+               attr_content(attr, "level", &split_level_str);
+                tinfo->split_level = 
+                    split_level_str ? atoi(split_level_str) : 0;
            }
        }
        else
@@ -305,28 +310,27 @@ static struct filter_schema *lookup_schema(struct filter_info *tinfo,
 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
 {
     struct filter_info *tinfo = clientData;
-    if (!args || !*args){
-      yaz_log(YLOG_WARN, "alvis filter: need config file");
-      return ZEBRA_FAIL;
+    if (!args || !*args)
+    {
+        yaz_log(YLOG_WARN, "alvis filter: need config file");
+        return ZEBRA_FAIL;
     }
 
     if (tinfo->fname && !strcmp(args, tinfo->fname))
        return ZEBRA_OK;
     
-    tinfo->profile_path 
-      /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */
-      = res_get(res, "profilePath");
+    tinfo->profile_path = res_get(res, "profilePath");
     yaz_log(YLOG_LOG, "alvis filter: profilePath %s", tinfo->profile_path);
 
     destroy_schemas(tinfo);
-    create_schemas(tinfo, args);
-    return ZEBRA_OK;
+    return create_schemas(tinfo, args);
 }
 
 static void filter_destroy(void *clientData)
 {
     struct filter_info *tinfo = clientData;
     destroy_schemas(tinfo);
+    xfree(tinfo->full_name);
     if (tinfo->reader)
        xmlFreeTextReader(tinfo->reader);
     odr_destroy(tinfo->odr);
@@ -381,10 +385,10 @@ static void index_node(struct filter_info *tinfo,  struct recExtractCtrl *ctrl,
            }
            if (name_str)
            {
-               int prev_type = recWord->index_type; /* save default type */
+               const char *prev_type = recWord->index_type; /* save default type */
 
                if (type_str && *type_str)
-                   recWord->index_type = *type_str; /* type was given */
+                   recWord->index_type = (const char *) type_str; /* type was given */
                recWord->index_name = name_str;
                index_cdata(tinfo, ctrl, ptr->children, recWord);
 
@@ -416,8 +420,7 @@ static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
            sscanf(id_str, "%255s", ctrl->match_criteria);
 
        if (rank_str)
-           ctrl->staticrank = atoi(rank_str);
-       
+           ctrl->staticrank = atozint(rank_str);
        ptr = ptr->children;
     }
 
@@ -463,7 +466,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
        else
        {
            yaz_log(YLOG_WARN, "No root for index XML record."
-                   " split_level=%s stylesheet=%s",
+                   " split_level=%d stylesheet=%s",
                    tinfo->split_level, schema->stylesheet);
        }
        xmlFreeDoc(resDoc);
@@ -471,7 +474,8 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
     xmlDocDumpMemory(doc, &buf_out, &len_out);
     if (p->flagShowRecords)
        fwrite(buf_out, len_out, 1, stdout);
-    (*p->setStoreData)(p, buf_out, len_out);
+    if (p->setStoreData)
+        (*p->setStoreData)(p, buf_out, len_out);
     xmlFree(buf_out);
     
     xmlFreeDoc(doc);
@@ -481,7 +485,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
 {
     int ret;
-    int split_depth = 0;
+
     if (p->first_record)
     {
        if (tinfo->reader)
@@ -490,28 +494,36 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
                                       p /* I/O handler */,
                                       0 /* URL */, 
                                       0 /* encoding */,
-                                      XML_PARSE_XINCLUDE);
+                                      XML_PARSE_XINCLUDE
+                                       | XML_PARSE_NOENT
+                                       | XML_PARSE_NONET);
     }
     if (!tinfo->reader)
        return RECCTRL_EXTRACT_ERROR_GENERIC;
 
-    if (tinfo->split_level)
-       split_depth = atoi(tinfo->split_level);
     ret = xmlTextReaderRead(tinfo->reader);
-    while (ret == 1) {
+    while (ret == 1)
+    {
        int type = xmlTextReaderNodeType(tinfo->reader);
        int depth = xmlTextReaderDepth(tinfo->reader);
-       if (split_depth == 0 ||
-           (split_depth > 0 &&
-            type == XML_READER_TYPE_ELEMENT && split_depth == depth))
+       if (type == XML_READER_TYPE_ELEMENT && tinfo->split_level == depth)
        {
            xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
-           xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
-           xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
-
-           xmlDocSetRootElement(doc, ptr2);
-
-           return extract_doc(tinfo, p, doc);   
+            if (ptr)
+            {
+                xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
+                xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
+                
+                xmlDocSetRootElement(doc, ptr2);
+                
+                return extract_doc(tinfo, p, doc);
+            }
+            else
+            {
+                xmlFreeTextReader(tinfo->reader);
+                tinfo->reader = 0;
+                return RECCTRL_EXTRACT_ERROR_GENERIC;
+            }
        }
        ret = xmlTextReaderRead(tinfo->reader);
     }
@@ -524,18 +536,24 @@ static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
 {
     if (p->first_record) /* only one record per stream */
     {
-       xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
-                                 0 /* URL */,
-                                 0 /* encoding */,
-                                 XML_PARSE_XINCLUDE);
-       if (!doc)
-       {
-           return RECCTRL_EXTRACT_ERROR_GENERIC;
-       }
-       return extract_doc(tinfo, p, doc);
+       xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
+                                 0 /* URL */,
+                                 0 /* encoding */,
+                                 XML_PARSE_XINCLUDE
+                                 | XML_PARSE_NOENT
+                                 | XML_PARSE_NONET);
+       if (!doc)
+           return RECCTRL_EXTRACT_ERROR_GENERIC;
+       /* else {
+           xmlNodePtr root = xmlDocGetRootElement(doc);
+            if (!root)
+                return RECCTRL_EXTRACT_ERROR_GENERIC;
+                } */
+       
+       return extract_doc(tinfo, p, doc);
     }
     else
-       return RECCTRL_EXTRACT_EOF;
+       return RECCTRL_EXTRACT_EOF;
 }
 
 static int filter_extract(void *clientData, struct recExtractCtrl *p)
@@ -543,13 +561,10 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
     struct filter_info *tinfo = clientData;
 
     odr_reset(tinfo->odr);
-
-    if (tinfo->split_level == 0 && tinfo->split_path == 0)
-       return extract_full(tinfo, p);
+    if (tinfo->split_level == 0 || p->setStoreData == 0)
+        return extract_full(tinfo, p);
     else
-    {
-       return extract_split(tinfo, p);
-    }
+        return extract_split(tinfo, p);
 }
 
 static int ioread_ret(void *context, char *buffer, int len)
@@ -563,54 +578,6 @@ static int ioclose_ret(void *context)
     return 0;
 }
 
-
-static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
-                              int window_size)
-{
-    const char *xml_doc_str;
-    int ord = 0;
-    WRBUF wrbuf = wrbuf_alloc();
-    zebra_snippets *res = 
-       zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size);
-    zebra_snippet_word *w = zebra_snippets_list(res);
-
-    if (text_mode)
-       wrbuf_printf(wrbuf, "\'");
-    else
-       wrbuf_printf(wrbuf, "<snippet xmlns='%s'>\n", zebra_xslt_ns);
-    for (; w; w = w->next)
-    {
-       if (ord == 0)
-           ord = w->ord;
-       else if (ord != w->ord)
-
-           break;
-       if (text_mode)
-           wrbuf_printf(wrbuf, "%s%s%s ", 
-                        w->match ? "*" : "",
-                        w->term,
-                        w->match ? "*" : "");
-       else
-       {
-           wrbuf_printf(wrbuf, " <term ord='%d' seqno='" ZINT_FORMAT "' %s>", 
-                        w->ord, w->seqno,
-                        (w->match ? "match='1'" : ""));
-           wrbuf_xmlputs(wrbuf, w->term);
-           wrbuf_printf(wrbuf, "</term>\n");
-       }
-    }
-    if (text_mode)
-       wrbuf_printf(wrbuf, "\'");
-    else
-       wrbuf_printf(wrbuf, "</snippet>\n");
-
-    xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
-
-    zebra_snippets_destroy(res);
-    wrbuf_free(wrbuf, 1);
-    return xml_doc_str;
-}
-
 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
 {
     /* const char *esn = zebra_xslt_ns; */
@@ -620,7 +587,6 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     xmlDocPtr resDoc;
     xmlDocPtr doc;
     struct filter_schema *schema;
-    int window_size = -1;
 
     if (p->comp)
     {
@@ -645,9 +611,6 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
        return 0;
     }
 
-    if (schema->include_snippet)
-       window_size = atoi(schema->include_snippet);
-
     params[0] = 0;
     set_param_int(params, "id", p->localno, p->odr);
     if (p->fname)
@@ -669,26 +632,16 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
        set_param_int(params, "score", p->score, p->odr);
     set_param_int(params, "size", p->recordSize, p->odr);
 
-    if (window_size >= 0)
-       set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
-                     p->odr);
     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
                    0 /* URL */,
                    0 /* encoding */,
-                   XML_PARSE_XINCLUDE);
+                   XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
     if (!doc)
     {
        p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
        return 0;
     }
 
-    if (window_size >= 0)
-    {
-       xmlNodePtr node = xmlDocGetRootElement(doc);
-       const char *snippet_str = snippet_doc(p, 0, window_size);
-       xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str));
-       xmlAddChild(node, xmlDocGetRootElement(snippet_doc));
-    }
     if (!schema->stylesheet_xsp)
        resDoc = doc;
     else
@@ -701,29 +654,36 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     {
        p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
     }
-    else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
+    else if (!p->input_format 
+             || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
     {
        xmlChar *buf_out;
        int len_out;
 
-        xsltSaveResultToString(&buf_out, &len_out, resDoc,
-                               schema->stylesheet_xsp);        
+        if (schema->stylesheet_xsp)
+            xsltSaveResultToString(&buf_out, &len_out, resDoc,
+                                   schema->stylesheet_xsp);    
+        else
+           xmlDocDumpMemory(resDoc, &buf_out, &len_out);            
 
-       p->output_format = VAL_TEXT_XML;
+       p->output_format = yaz_oid_recsyn_xml;
        p->rec_len = len_out;
        p->rec_buf = odr_malloc(p->odr, p->rec_len);
        memcpy(p->rec_buf, buf_out, p->rec_len);
        xmlFree(buf_out);
     }
-    else if (p->output_format == VAL_SUTRS)
+    else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
     {
        xmlChar *buf_out;
        int len_out;
 
-        xsltSaveResultToString(&buf_out, &len_out, resDoc,
-                               schema->stylesheet_xsp);        
+        if (schema->stylesheet_xsp)
+            xsltSaveResultToString(&buf_out, &len_out, resDoc,
+                                   schema->stylesheet_xsp);
+        else
+           xmlDocDumpMemory(resDoc, &buf_out, &len_out);            
 
-       p->output_format = VAL_SUTRS;
+       p->output_format = yaz_oid_recsyn_sutrs;
        p->rec_len = len_out;
        p->rec_buf = odr_malloc(p->odr, p->rec_len);
        memcpy(p->rec_buf, buf_out, p->rec_len);