2007.
[idzebra-moved-to-github.git] / index / alvis.c
index 93f4241..60beaca 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: alvis.c,v 1.2 2006-08-14 10:40:15 adam Exp $
-   Copyright (C) 1995-2006
+/* $Id: alvis.c,v 1.11 2007-01-15 15:10:16 adam Exp $
+   Copyright (C) 1995-2007
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -35,6 +35,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <libxslt/transform.h>
 #include <libxslt/xsltutils.h>
 
+#if YAZ_HAVE_EXSLT
+#include <libexslt/exslt.h>
+#endif
+
 #include <idzebra/util.h>
 #include <idzebra/recctrl.h>
 
@@ -54,7 +58,7 @@ struct filter_info {
     char *fname;
     char *full_name;
     const char *profile_path;
-    const char *split_level;
+    int split_level;
     const char *split_path;
     ODR odr;
     struct filter_schema *schemas;
@@ -140,6 +144,10 @@ static void *filter_init(Res res, RecType recType)
     tinfo->doc = 0;
     tinfo->schemas = 0;
 
+#if YAZ_HAVE_EXSLT
+    exsltRegisterAll(); 
+#endif
+
 #if ENABLE_INPUT_CALLBACK
     xmlRegisterDefaultInputCallbacks();
     xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
@@ -153,8 +161,8 @@ static void *filter_init(Res res, RecType recType)
 static int attr_content(struct _xmlAttr *attr, const char *name,
                        const char **dst_content)
 {
-    if (!XML_STRCMP(attr->name, name) && attr->children &&
-       attr->children->type == XML_TEXT_NODE)
+    if (!XML_STRCMP(attr->name, name) && attr->children 
+        && attr->children->type == XML_TEXT_NODE)
     {
        *dst_content = (const char *)(attr->children->content);
        return 1;
@@ -185,27 +193,29 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
     char tmp_full_name[1024];
     xmlNodePtr ptr;
     tinfo->fname = xstrdup(fname);
-
-   if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
+    
+    if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
                              NULL, tmp_full_name))
-      tinfo->full_name = xstrdup(tmp_full_name);
+        tinfo->full_name = xstrdup(tmp_full_name);
     else
-      tinfo->full_name = xstrdup(tinfo->fname);
-
+        tinfo->full_name = xstrdup(tinfo->fname);
+    
     yaz_log(YLOG_LOG, "alvis filter: loading config file %s", tinfo->full_name);
-
+    
     tinfo->doc = xmlParseFile(tinfo->full_name);
-
-    if (!tinfo->doc){
+    
+    if (!tinfo->doc)
+    {
         yaz_log(YLOG_WARN, "alvis filter: could not parse config file %s", 
                 tinfo->full_name);
-
+        
        return ZEBRA_FAIL;
     }
     
     ptr = xmlDocGetRootElement(tinfo->doc);
-    if (!ptr || ptr->type != XML_ELEMENT_NODE ||
-       XML_STRCMP(ptr->name, "schemaInfo")){
+    if (!ptr || ptr->type != XML_ELEMENT_NODE 
+        || XML_STRCMP(ptr->name, "schemaInfo"))
+    {
         yaz_log(YLOG_WARN, 
                 "alvis filter:  config file %s :" 
                 " expected root element <schemaInfo>", 
@@ -219,7 +229,6 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
            continue;
        if (!XML_STRCMP(ptr->name, "schema"))
        {  
-            char tmp_xslt_full_name[1024];
            struct _xmlAttr *attr;
            struct filter_schema *schema = xmalloc(sizeof(*schema));
            schema->name = 0;
@@ -243,26 +252,37 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
 
             /* find requested schema */
 
-           if (schema->stylesheet){
-              yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, 
-                                   NULL, tmp_xslt_full_name);
-              schema->stylesheet_xsp 
-                = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
-              if (!schema->stylesheet_xsp)
-                yaz_log(YLOG_WARN, 
-                        "alvis filter: could not parse xslt stylesheet %s", 
-                        tmp_xslt_full_name);
+           if (schema->stylesheet)
+            {
+                char tmp_xslt_full_name[1024];
+                if (!yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, 
+                                          NULL, tmp_xslt_full_name)) 
+                {
+                    yaz_log(YLOG_WARN, 
+                            "alvis filter: stylesheet %s not found in path %s",
+                            schema->stylesheet, tinfo->profile_path);
+                    return ZEBRA_FAIL;
+                }
+                schema->stylesheet_xsp 
+                    = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
+                if (!schema->stylesheet_xsp)
+                {
+                    yaz_log(YLOG_WARN, 
+                            "alvis filter: could not parse xslt stylesheet %s", 
+                            tmp_xslt_full_name);
+                    return ZEBRA_FAIL;
+                }
             }
-            
-                
        }
        else if (!XML_STRCMP(ptr->name, "split"))
        {
            struct _xmlAttr *attr;
            for (attr = ptr->properties; attr; attr = attr->next)
            {
-               attr_content(attr, "level", &tinfo->split_level);
-               attr_content(attr, "path", &tinfo->split_path);
+                const char *split_level_str = 0;
+               attr_content(attr, "level", &split_level_str);
+                tinfo->split_level = 
+                    split_level_str ? atoi(split_level_str) : 0;
            }
        }
        else
@@ -305,22 +325,20 @@ static struct filter_schema *lookup_schema(struct filter_info *tinfo,
 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
 {
     struct filter_info *tinfo = clientData;
-    if (!args || !*args){
-      yaz_log(YLOG_WARN, "alvis filter: need config file");
-      return ZEBRA_FAIL;
+    if (!args || !*args)
+    {
+        yaz_log(YLOG_WARN, "alvis filter: need config file");
+        return ZEBRA_FAIL;
     }
 
     if (tinfo->fname && !strcmp(args, tinfo->fname))
        return ZEBRA_OK;
     
-    tinfo->profile_path 
-      /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */
-      = res_get(res, "profilePath");
+    tinfo->profile_path = res_get(res, "profilePath");
     yaz_log(YLOG_LOG, "alvis filter: profilePath %s", tinfo->profile_path);
 
     destroy_schemas(tinfo);
-    create_schemas(tinfo, args);
-    return ZEBRA_OK;
+    return create_schemas(tinfo, args);
 }
 
 static void filter_destroy(void *clientData)
@@ -336,7 +354,7 @@ static void filter_destroy(void *clientData)
 static int ioread_ex(void *context, char *buffer, int len)
 {
     struct recExtractCtrl *p = context;
-    return (*p->readf)(p->fh, buffer, len);
+    return p->stream->readf(p->stream, buffer, len);
 }
 
 static int ioclose_ex(void *context)
@@ -416,8 +434,7 @@ static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
            sscanf(id_str, "%255s", ctrl->match_criteria);
 
        if (rank_str)
-           ctrl->staticrank = atoi(rank_str);
-       
+           ctrl->staticrank = atozint(rank_str);
        ptr = ptr->children;
     }
 
@@ -463,7 +480,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
        else
        {
            yaz_log(YLOG_WARN, "No root for index XML record."
-                   " split_level=%s stylesheet=%s",
+                   " split_level=%d stylesheet=%s",
                    tinfo->split_level, schema->stylesheet);
        }
        xmlFreeDoc(resDoc);
@@ -481,7 +498,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
 {
     int ret;
-    int split_depth = 0;
+
     if (p->first_record)
     {
        if (tinfo->reader)
@@ -495,23 +512,29 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
     if (!tinfo->reader)
        return RECCTRL_EXTRACT_ERROR_GENERIC;
 
-    if (tinfo->split_level)
-       split_depth = atoi(tinfo->split_level);
     ret = xmlTextReaderRead(tinfo->reader);
-    while (ret == 1) {
+    while (ret == 1)
+    {
        int type = xmlTextReaderNodeType(tinfo->reader);
        int depth = xmlTextReaderDepth(tinfo->reader);
-       if (split_depth == 0 ||
-           (split_depth > 0 &&
-            type == XML_READER_TYPE_ELEMENT && split_depth == depth))
+       if (type == XML_READER_TYPE_ELEMENT && tinfo->split_level == depth)
        {
            xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
-           xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
-           xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
-
-           xmlDocSetRootElement(doc, ptr2);
-
-           return extract_doc(tinfo, p, doc);   
+            if (ptr)
+            {
+                xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
+                xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
+                
+                xmlDocSetRootElement(doc, ptr2);
+                
+                return extract_doc(tinfo, p, doc);
+            }
+            else
+            {
+                xmlFreeTextReader(tinfo->reader);
+                tinfo->reader = 0;
+                return RECCTRL_EXTRACT_ERROR_GENERIC;
+            }
        }
        ret = xmlTextReaderRead(tinfo->reader);
     }
@@ -524,18 +547,18 @@ static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
 {
     if (p->first_record) /* only one record per stream */
     {
-       xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
-                                 0 /* URL */,
-                                 0 /* encoding */,
-                                 XML_PARSE_XINCLUDE);
-       if (!doc)
-       {
-           return RECCTRL_EXTRACT_ERROR_GENERIC;
-       }
-       return extract_doc(tinfo, p, doc);
+       xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
+                                 0 /* URL */,
+                                 0 /* encoding */,
+                                 XML_PARSE_XINCLUDE);
+       if (!doc)
+       {
+           return RECCTRL_EXTRACT_ERROR_GENERIC;
+       }
+       return extract_doc(tinfo, p, doc);
     }
     else
-       return RECCTRL_EXTRACT_EOF;
+       return RECCTRL_EXTRACT_EOF;
 }
 
 static int filter_extract(void *clientData, struct recExtractCtrl *p)
@@ -543,19 +566,16 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
     struct filter_info *tinfo = clientData;
 
     odr_reset(tinfo->odr);
-
-    if (tinfo->split_level == 0 && tinfo->split_path == 0)
-       return extract_full(tinfo, p);
+    if (tinfo->split_level == 0)
+        return extract_full(tinfo, p);
     else
-    {
-       return extract_split(tinfo, p);
-    }
+        return extract_split(tinfo, p);
 }
 
 static int ioread_ret(void *context, char *buffer, int len)
 {
     struct recRetrieveCtrl *p = context;
-    return (*p->readf)(p->fh, buffer, len);
+    return p->stream->readf(p->stream, buffer, len);
 }
 
 static int ioclose_ret(void *context)
@@ -563,7 +583,6 @@ static int ioclose_ret(void *context)
     return 0;
 }
 
-
 static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
                               int window_size)
 {
@@ -706,8 +725,11 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
        xmlChar *buf_out;
        int len_out;
 
-        xsltSaveResultToString(&buf_out, &len_out, resDoc,
-                               schema->stylesheet_xsp);        
+        if (schema->stylesheet_xsp)
+            xsltSaveResultToString(&buf_out, &len_out, resDoc,
+                                   schema->stylesheet_xsp);    
+        else
+           xmlDocDumpMemory(resDoc, &buf_out, &len_out);            
 
        p->output_format = VAL_TEXT_XML;
        p->rec_len = len_out;
@@ -720,8 +742,11 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
        xmlChar *buf_out;
        int len_out;
 
-        xsltSaveResultToString(&buf_out, &len_out, resDoc,
-                               schema->stylesheet_xsp);        
+        if (schema->stylesheet_xsp)
+            xsltSaveResultToString(&buf_out, &len_out, resDoc,
+                                   schema->stylesheet_xsp);
+        else
+           xmlDocDumpMemory(resDoc, &buf_out, &len_out);            
 
        p->output_format = VAL_SUTRS;
        p->rec_len = len_out;