optimized code such that the RecWord structure recword is only
[idzebra-moved-to-github.git] / index / mod_dom.c
index 4ae23d5..5137efd 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: mod_dom.c,v 1.10 2007-02-14 16:38:41 marc Exp $
+/* $Id: mod_dom.c,v 1.15 2007-02-15 15:08:41 marc Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
 #include <idzebra/util.h>
 #include <idzebra/recctrl.h>
 
+/* DOM filter style indexing */
+#define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
+static const char *zebra_dom_ns = ZEBRA_DOM_NS;
+
+/* DOM filter style indexing */
+#define ZEBRA_PI_NAME "zebra-2.0"
+static const char *zebra_pi_name = ZEBRA_PI_NAME;
+
+
+
 struct convert_s {
     const char *stylesheet;
     xsltStylesheetPtr stylesheet_xsp;
@@ -242,9 +252,10 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
                             ;
                         else
                             yaz_log(YLOG_WARN, "%s: dom filter: "
-                                    "bad attribute %s"
-                                    " for <xslt>",
-                                    tinfo->fname, attr->name);
+                                    "%s bad attribute @%s, "
+                                    "expected @stylesheet",
+                                    tinfo->fname, 
+                                    xmlGetNodePath(ptr), attr->name);
                     if (p->stylesheet)
                         {
                             char tmp_xslt_full_name[1024];
@@ -253,8 +264,7 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
                                                       NULL, 
                                                       tmp_xslt_full_name))
                                 {
-                                    yaz_log(YLOG_WARN,
-                                            "%s: dom filter: "
+                                    yaz_log(YLOG_WARN, "%s: dom filter: "
                                             "stylesheet %s not found in "
                                             "path %s",
                                             tinfo->fname,
@@ -268,8 +278,7 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
                                                           tmp_xslt_full_name);
                             if (!p->stylesheet_xsp)
                                 {
-                                    yaz_log(YLOG_WARN,
-                                            "%s: dom filter: "
+                                    yaz_log(YLOG_WARN, "%s: dom filter: "
                                             "could not parse xslt "
                                             "stylesheet %s",
                                             tinfo->fname, tmp_xslt_full_name);
@@ -278,10 +287,9 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
                         }
                     else
                         {
-                            yaz_log(YLOG_WARN,
-                                    "%s: dom filter: "
-                                    "missing attribute 'stylesheet' "
-                                    "for element 'xslt'", tinfo->fname);
+                            yaz_log(YLOG_WARN, "%s: dom filter: "
+                                    "%s missing attribute 'stylesheet' ", 
+                                    tinfo->fname, xmlGetNodePath(ptr));
                             return ZEBRA_FAIL;
                         }
                     *l = p;
@@ -290,8 +298,9 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
             else
                 {
                     yaz_log(YLOG_LOG, 
-                            "%s: dom filter: bad node '%s' for <conv>",
-                            tinfo->fname, ptr->name);
+                            "%s: dom filter: "
+                            "%s bad node '%s'",
+                            tinfo->fname, xmlGetNodePath(ptr), ptr->name);
                     return ZEBRA_FAIL;
                 }
         
@@ -351,18 +360,19 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
                             if (attr_content(attr, "charset", &input_charset))
                                 ;
                             else
-                                yaz_log(YLOG_WARN, 
-                                        "%s: dom filter: bad attribute %s"
-                                        " for <marc>",
-                                        tinfo->fname, attr->name);
+                                yaz_log(YLOG_WARN, "%s: dom filter: "
+                                        "%s bad attribute @%s,"
+                                        " expected @charset",
+                                        tinfo->fname, 
+                                        xmlGetNodePath(ptr), attr->name);
                         }
                     iconv = yaz_iconv_open("utf-8", input_charset);
                     if (!iconv)
                         {
-                            yaz_log(YLOG_WARN, 
-                                    "%s: dom filter: unsupported charset "
-                                    "'%s' for <marc>", 
-                                    tinfo->fname,  input_charset);
+                            yaz_log(YLOG_WARN, "%s: dom filter: "
+                                    "%s unsupported @charset '%s'", 
+                                    tinfo->fname, xmlGetNodePath(ptr),
+                                    input_charset);
                             return ZEBRA_FAIL;
                         }
                     else
@@ -396,10 +406,11 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
                             if (attr_content(attr, "level", &level_str))
                                 ;
                             else
-                                yaz_log(YLOG_WARN, 
-                                        "%s: dom filter: bad attribute %s"
-                                        " for <xmlreader>",
-                                        tinfo->fname, attr->name);
+                                yaz_log(YLOG_WARN, "%s: dom filter: "
+                                        "%s bad attribute @%s,"
+                                        " expected @level",
+                                        tinfo->fname, xmlGetNodePath(ptr),
+                                        attr->name);
                         }
                     if (level_str)
                         p->u.xmlreader.split_level = atoi(level_str);
@@ -411,8 +422,10 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
                 }
             else
                 {
-                    yaz_log(YLOG_WARN, "%s: dom filter: bad input type %s",
-                            tinfo->fname, ptr->name);
+                    yaz_log(YLOG_WARN, "%s: dom filter: "
+                            "%s bad element <%s>,"
+                            " expected <marc>|<xmlreader>",
+                            tinfo->fname, xmlGetNodePath(ptr), ptr->name);
                     return ZEBRA_FAIL;
                 }
         }
@@ -433,13 +446,14 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
     else
         tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname);
     
-    yaz_log(YLOG_LOG, "dom filter: loading config file %s", tinfo->full_name);
+    yaz_log(YLOG_LOG, "%s dom filter: "
+            "loading config file %s", tinfo->fname, tinfo->full_name);
     
     doc = xmlParseFile(tinfo->full_name);
     if (!doc)
         {
-            yaz_log(YLOG_WARN, 
-                    "%s: dom filter: failed to parse config file %s",
+            yaz_log(YLOG_WARN, "%s: dom filter: "
+                    "failed to parse config file %s",
                     tinfo->fname, tinfo->full_name);
             return ZEBRA_FAIL;
         }
@@ -450,9 +464,10 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
     if (!ptr || ptr->type != XML_ELEMENT_NODE 
         || XML_STRCMP(ptr->name, "dom"))
         {
-            yaz_log(YLOG_WARN, 
-                    "%s: dom filter: expected root element <dom>", 
-                    tinfo->fname);  
+            yaz_log(YLOG_WARN, "%s: dom filter: "
+                    "%s bad root element <%s>,"
+                    " expected root element <dom>", 
+                    tinfo->fname, xmlGetNodePath(ptr), ptr->name);  
             return ZEBRA_FAIL;
         }
 
@@ -480,10 +495,11 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
                             if (attr_content(attr, "name", &f->name))
                                 ;
                             else
-                                yaz_log(YLOG_WARN, 
-                                        "%s: dom filter: bad attribute %s"
-                                        " for <extract>",
-                                        tinfo->fname, attr->name);
+                                yaz_log(YLOG_WARN, "%s: dom filter: "
+                                        "%s bad attribute @%s"
+                                        " expected @name",
+                                        tinfo->fname, 
+                                        xmlGetNodePath(ptr),attr->name);
 
                         }
                     parse_convert(tinfo, ptr->children, &f->convert);
@@ -518,17 +534,18 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
                             else if (attr_content(attr, "name", &f->name))
                                 ;
                             else
-                                yaz_log(YLOG_WARN, 
-                                        "%s: dom filter: bad attribute %s"
-                                        " for <retrieve>",
-                                        tinfo->fname, attr->name);
+                                yaz_log(YLOG_WARN, "%s: dom filter: "
+                                        "%s bad attribute @%s"
+                                        " expected @identifier|@name",
+                                        tinfo->fname, 
+                                        xmlGetNodePath(ptr),attr->name);
                         }
                     parse_convert(tinfo, ptr->children, &f->convert);
                 }
             else if (!XML_STRCMP(ptr->name, "store"))
                 {
                     /*
-                      <retrieve name="F">
+                      <store name="F">
                       <xslt stylesheet="some.xsl"/>
                       <xslt stylesheet="some.xsl"/>
                       </retrieve>
@@ -560,17 +577,20 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
                             else if (attr_content(attr, "name", &name))
                                 ;
                             else
-                                yaz_log(YLOG_WARN, 
-                                        "%s: dom filter: bad attribute %s"
-                                        " for <input>",
-                                        tinfo->fname, attr->name);
+                                yaz_log(YLOG_WARN, "%s: dom filter: "
+                                        "%s bad attribute @%s"
+                                        " expected @syntax|@name",
+                                        tinfo->fname, 
+                                        xmlGetNodePath(ptr),attr->name);
                         }
                     parse_input(tinfo, ptr->children, syntax, name);
                 }
             else
                 {
-                    yaz_log(YLOG_WARN, "%s: dom filter: bad element %s",
-                            tinfo->fname, ptr->name);
+                    yaz_log(YLOG_WARN, "%s: dom filter: "
+                            "%s bad element <%s>,"
+                            " expected <extract>|<input>|<retrieve>|<store>",
+                            tinfo->fname, xmlGetNodePath(ptr), ptr->name);
                     return ZEBRA_FAIL;
                 }
         }
@@ -638,145 +658,6 @@ static int ioclose_ex(void *context)
 }
 
 
-/* Alvis style indexing */
-#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
-static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
-
-/* Alvis style indexing */
-static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
-                       xmlNodePtr ptr, RecWord *recWord)
-{
-    for(; ptr; ptr = ptr->next)
-        {
-            index_cdata(tinfo, ctrl, ptr->children, recWord);
-            if (ptr->type != XML_TEXT_NODE)
-                continue;
-            recWord->term_buf = (const char *)ptr->content;
-            recWord->term_len = XML_STRLEN(ptr->content);
-            (*ctrl->tokenAdd)(recWord);
-        }
-}
-
-/* Alvis style indexing */
-static void index_node(struct filter_info *tinfo,  struct recExtractCtrl *ctrl,
-                      xmlNodePtr ptr, RecWord *recWord)
-{
-    for(; ptr; ptr = ptr->next)
-        {
-            index_node(tinfo, ctrl, ptr->children, recWord);
-            if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
-                XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
-                continue;
-            if (!XML_STRCMP(ptr->name, "index"))
-                {
-                    const char *name_str = 0;
-                    const char *type_str = 0;
-                    const char *xpath_str = 0;
-                    struct _xmlAttr *attr;
-                    for (attr = ptr->properties; attr; attr = attr->next)
-                        {
-                            if (attr_content(attr, "name", &name_str))
-                                ;
-                            else if (attr_content(attr, "xpath", &xpath_str))
-                                ;
-                            else if (attr_content(attr, "type", &type_str))
-                                ;
-                            else
-                                yaz_log(YLOG_WARN, 
-                                        "%s: dom filter: bad attribute %s"
-                                        " for <index>",
-                                        tinfo->fname, attr->name);
-                        }
-                    if (name_str)
-                        {
-                            /* save default type */
-                            int prev_type = recWord->index_type; 
-
-                            /* type was given */
-                            if (type_str && *type_str)
-                                recWord->index_type = *type_str; 
-
-                            recWord->index_name = name_str;
-                            index_cdata(tinfo, ctrl, ptr->children, recWord);
-
-                            /* restore it again */
-                            recWord->index_type = prev_type;     
-                        }
-                }
-        }
-}
-
-/* Alvis style indexing */
-static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
-                        xmlNodePtr ptr, RecWord *recWord)
-{
-    const char *type_str = "update";
-
-    if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
-       !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
-       && !XML_STRCMP(ptr->name, "record"))
-        {
-            const char *id_str = 0;
-            const char *rank_str = 0;
-            struct _xmlAttr *attr;
-            for (attr = ptr->properties; attr; attr = attr->next)
-                {
-                    if (attr_content(attr, "type", &type_str))
-                        ;
-                    else if (attr_content(attr, "id", &id_str))
-                        ;
-                    else if (attr_content(attr, "rank", &rank_str))
-                        ;
-                    else
-                        yaz_log(YLOG_WARN, "%s: dom filter: bad attribute %s"
-                                " for <record>",
-                                tinfo->fname, attr->name);
-                }
-            if (id_str)
-                sscanf(id_str, "%255s", ctrl->match_criteria);
-
-            if (rank_str)
-                ctrl->staticrank = atozint(rank_str);
-            ptr = ptr->children;
-        }
-
-    if (!strcmp("update", type_str))
-        index_node(tinfo, ctrl, ptr, recWord);
-    else if (!strcmp("delete", type_str))
-        yaz_log(YLOG_WARN, "dom filter delete: to be implemented");
-    else
-        yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", 
-                type_str);
-}
-
-
-/* Alvis style indexing */
-static void extract_doc_alvis(struct filter_info *tinfo, 
-                              struct recExtractCtrl *recctr, 
-                              xmlDocPtr doc)
-{
-    if (doc){
-        RecWord recWord;
-        xmlChar *buf_out;
-        int len_out;
-        xmlNodePtr root_ptr;
-
-        (*recctr->init)(recctr, &recWord);
-        
-       if (recctr->flagShowRecords){
-            xmlDocDumpMemory(doc, &buf_out, &len_out);
-           fwrite(buf_out, len_out, 1, stdout);
-           xmlFree(buf_out);
-       }
-       root_ptr = xmlDocGetRootElement(doc);
-       if (root_ptr)
-           index_record(tinfo, recctr, root_ptr, &recWord);
-        else
-            yaz_log(YLOG_WARN, "No root for index XML record");
-    }
-}
-
-
 /* DOM filter style indexing */
 static int attr_content_xml(struct _xmlAttr *attr, const char *name,
                             xmlChar **dst_content)
@@ -790,96 +671,120 @@ static int attr_content_xml(struct _xmlAttr *attr, const char *name,
     return 0;
 }
 
-/* DOM filter style indexing */
-/* #define ZEBRA_XSLT_NS "http://indexdata.com/zebra-2.0" */
-/* static const char *zebra_xslt_ns = ZEBRA_XSLT_NS; */
 
 /* DOM filter style indexing */
-#define ZEBRA_PI_NAME "zebra-2.0"
-static const char *zebra_pi_name = ZEBRA_PI_NAME;
-
-
-/* DOM filter style indexing */
-void index_value_of(struct filter_info *tinfo, 
-                    struct recExtractCtrl *recctr, 
-                    xmlNodePtr node, 
-                    xmlChar * index_p)
+static void index_value_of(struct filter_info *tinfo, 
+                           struct recExtractCtrl *extctr,
+                           RecWord* recword, 
+                           xmlNodePtr node, 
+                           xmlChar * index_p)
 {
     xmlChar *text = xmlNodeGetContent(node);
+    size_t text_len = strlen((const char *)text);
 
-    xmlChar *look = index_p;
-    xmlChar *bval;
-    xmlChar *eval;
 
-    xmlChar index[256];
-    xmlChar type[256];
+    /* if there is no text, we do not need to proceed */
+    if (text_len)
+        {            
+            xmlChar *look = index_p;
+            xmlChar *bval;
+            xmlChar *eval;
+
+            xmlChar index[256];
+            xmlChar type[256];
 
-    /* parsing all index name/type pairs - may not start with ' ' or ':' */
-    while (*look && ' ' != *look && ':' != *look){
+            /* assingning text to be indexed */
+            recword->term_buf = (const char *)text;
+            recword->term_len = text_len;
+
+            /* parsing all index name/type pairs */
+            /* may not start with ' ' or ':' */
+            while (*look && ' ' != *look && ':' != *look){
     
-        /* setting name and type to zero */
-        *index = '\0';
-        *type = '\0';
+                /* setting name and type to zero */
+                *index = '\0';
+                *type = '\0';
     
-        /* parsing one index name */
-        bval = look;
-        while (*look && ':' != *look && ' ' != *look){
-            look++;
-        }
-        eval = look;
-        strncpy((char *)index, (const char *)bval, eval - bval);
-        index[eval - bval] = '\0';
+                /* parsing one index name */
+                bval = look;
+                while (*look && ':' != *look && ' ' != *look){
+                    look++;
+                }
+                eval = look;
+                strncpy((char *)index, (const char *)bval, eval - bval);
+                index[eval - bval] = '\0';
     
     
-        /* parsing one index type, if existing */
-        if (':' == *look){
-            look++;
+                /* parsing one index type, if existing */
+                if (':' == *look){
+                    look++;
       
-            bval = look;
-            while (*look && ' ' != *look){
-                look++;
-            }
-            eval = look;
-            strncpy((char *)type, (const char *)bval, eval - bval);
-            type[eval - bval] = '\0';
-        }
+                    bval = look;
+                    while (*look && ' ' != *look){
+                        look++;
+                    }
+                    eval = look;
+                    strncpy((char *)type, (const char *)bval, eval - bval);
+                    type[eval - bval] = '\0';
+                }
 
-        printf("INDEX  '%s:%s' '%s'\n", index, type, text);
-    
-        if (*look && ' ' == *look && *(look+1)){
-            look++;
-        } 
-    }
+                /* actually indexing the text given */
+                yaz_log(YLOG_DEBUG, "%s dom filter: "
+                        "INDEX  '%s:%s' '%s'", 
+                        tinfo->fname, index, type, text);
 
-    xmlFree(text);
+                recword->index_name = (const char *)index;
+                if (type && *type)
+                    recword->index_type = *type;
+                (extctr->tokenAdd)(recword);
 
-    /*   //recWord->term_buf = (const char *)ptr->content; */
-    /*   //recWord->term_len = XML_STRLEN(ptr->content); */
-    /*   //  if (type_str && *type_str) */
-    /*   //  recWord->index_type = *type_str; /\* type was given *\/ */
-    /*   //  recWord->index_name = name_str; */
-    /*   // recWord->index_type = prev_type;     /\* restore it again *\/ */
+                /* eat whitespaces */
+                if (*look && ' ' == *look && *(look+1)){
+                    look++;
+                } 
+            }
+        }
+    
+    xmlFree(text); 
 }
 
 
 /* DOM filter style indexing */
-void set_record_info(struct filter_info *tinfo, 
-                     struct recExtractCtrl *recctr, 
-                     xmlChar * id_p, 
-                     xmlChar * rank_p, 
-                     xmlChar * type_p)
+static void set_record_info(struct filter_info *tinfo, 
+                            struct recExtractCtrl *extctr, 
+                            xmlChar * id_p, 
+                            xmlChar * rank_p, 
+                            xmlChar * type_p)
 {
-    printf("RECORD id=%s rank=%s type=%s\n", id_p, rank_p, type_p);
+    yaz_log(YLOG_DEBUG, "%s dom filter: "
+            "RECORD id=%s rank=%s type=%s", 
+            tinfo->fname,  id_p, rank_p, type_p);
+    
+    if (id_p)
+        sscanf((const char *)id_p, "%255s", extctr->match_criteria);
+
+    if (rank_p)
+        extctr->staticrank = atozint((const char *)rank_p);
+
+    /*     if (!strcmp("update", type_str)) */
+    /*         index_node(tinfo, ctrl, ptr, recword); */
+    /*     else if (!strcmp("delete", type_str)) */
+    /*         yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */
+    /*     else */
+    /*         yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'",  */
+    /*                 type_str); */
+
 }
 
 
 /* DOM filter style indexing */
-void process_xml_element_zebra_node(struct filter_info *tinfo, 
-                                    struct recExtractCtrl *recctr, 
-                                    xmlNodePtr node)
+static void process_xml_element_zebra_node(struct filter_info *tinfo, 
+                                           struct recExtractCtrl *extctr, 
+                                           RecWord* recword, 
+                                           xmlNodePtr node)
 {
     if (node->type == XML_ELEMENT_NODE 
-        && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_xslt_ns)){
+        && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)){
     
         if (0 == XML_STRCMP(node->name, "index")){
             xmlChar *index_p = 0;
@@ -887,14 +792,12 @@ void process_xml_element_zebra_node(struct filter_info *tinfo,
             struct _xmlAttr *attr;      
             for (attr = node->properties; attr; attr = attr->next){
                 if (attr_content_xml(attr, "name", &index_p)){
-                    index_value_of(tinfo, recctr, node, index_p);        
+                    index_value_of(tinfo, extctr, recword,node, index_p);
                 }  
                 else
-                    // printf("%s: dom filter: s% bad attribute %s",
-                    // tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
-                    printf("dom filter: %s bad attribute @%s, "
-                           "expected @name\n",
-                           xmlGetNodePath(node), attr->name);
+                    yaz_log(YLOG_WARN,"%s dom filter: "
+                            "%s bad attribute @%s, expected @name",
+                            tinfo->fname, xmlGetNodePath(node), attr->name);
             }
         }
         else if (0 == XML_STRCMP(node->name, "record")){
@@ -909,28 +812,28 @@ void process_xml_element_zebra_node(struct filter_info *tinfo,
                 else if (attr_content_xml(attr, "rank", &rank_p))
                     ;
                 else if (attr_content_xml(attr, "type", &type_p))
-                    ;
+                   ;
                 else
-                    // printf("%s: dom filter: s% bad attribute %s",
-                    // tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
-                    printf("dom filter: %s bad attribute @%s,"
-                           " expected @id|@rank|@type\n",
-                           xmlGetNodePath(node), attr->name);
+                    yaz_log(YLOG_WARN,"%s dom filter: "
+                            "%s bad attribute @%s,"
+                           " expected @id|@rank|@type",
+                           tinfo->fname, xmlGetNodePath(node), attr->name);
 
                 if (type_p && 0 != strcmp("update", (const char *)type_p))
-                    printf("dom filter: %s attribute @%s,"
-                           " only implemented '@type=\"update\"\n",
-                           xmlGetNodePath(node), attr->name);
+                    yaz_log(YLOG_WARN,"%s dom filter: "
+                            "%s attribute @%s,"
+                            " only implemented '@type='update'",
+                            tinfo->fname, xmlGetNodePath(node), attr->name);
           
 
             }
-            set_record_info(tinfo, recctr, id_p, rank_p, type_p);
+            set_record_info(tinfo, extctr, id_p, rank_p, type_p);
         } else {
-            //  printf("%s: dom filter: s% bad attribute %s",
-            //  tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
-            printf("dom filter: %s bad element <%s>,"
-                   " expected <record>|<index> in namespace '%s'\n",
-                   xmlGetNodePath(node), node->name, zebra_xslt_ns);
+            yaz_log(YLOG_WARN,"%s dom filter: "
+                    "%s bad element <%s>,"
+                    " expected <record>|<index> in namespace '%s'",
+                    tinfo->fname, xmlGetNodePath(node), 
+                    node->name, zebra_dom_ns);
       
         }
     }
@@ -938,13 +841,13 @@ void process_xml_element_zebra_node(struct filter_info *tinfo,
 
 
 /* DOM filter style indexing */
-void process_xml_pi_node(struct filter_info *tinfo, 
-                         struct recExtractCtrl *recctr, 
-                         xmlNodePtr node,
-                         xmlChar **index_pp)
+static void process_xml_pi_node(struct filter_info *tinfo, 
+                                struct recExtractCtrl *extctr, 
+                                xmlNodePtr node,
+                                xmlChar **index_pp)
 {
 
-    /* printf("PI     %s\n", xmlGetNodePath(node)); */
+    /* yaz_log(YLOG_DEBUG,"PI     %s\n", xmlGetNodePath(node)); */
 
     /* if right PI name, continue parsing PI */
     if (0 == strcmp(zebra_pi_name, (const char *)node->name)){
@@ -1000,13 +903,12 @@ void process_xml_pi_node(struct filter_info *tinfo,
             while (*look && ' ' == *look && *(look+1))
                 look++;
 
-            if (look && '\0' != *look){
-                printf ("ERROR %s: content '%s'; can not parse '%s'\n", 
-                        xmlGetNodePath(node), pi_p, look);
-            } else {
-                /* set_record_info(id, rank, type); */
-                set_record_info(tinfo, recctr, id, rank, 0);
-            }
+            if (look && '\0' != *look)
+                yaz_log(YLOG_WARN,"%s dom filter: "
+                        "%s content '%s', can not parse '%s'",
+                        tinfo->fname, xmlGetNodePath(node), pi_p, look);
+            else 
+                set_record_info(tinfo, extctr, id, rank, 0);
 
         } 
    
@@ -1020,44 +922,41 @@ void process_xml_pi_node(struct filter_info *tinfo,
 
             /* export index instructions to outside */
             *index_pp = look;
-
-            /* nor record, neither index */ 
-        } else {
-    
-            printf ("ERROR %s: content '%s'; can not parse '%s'\n", 
-                    xmlGetNodePath(node), pi_p, look);
-        }  
+        } 
+        else 
+            yaz_log(YLOG_WARN,"%s dom filter: "
+                    "%s content '%s', can not parse '%s'",
+                    tinfo->fname, xmlGetNodePath(node), pi_p, look);
     }
 }
 
 /* DOM filter style indexing */
-void process_xml_element_node(struct filter_info *tinfo, 
-                              struct recExtractCtrl *recctr, 
-                              xmlNodePtr node)
+static void process_xml_element_node(struct filter_info *tinfo, 
+                                     struct recExtractCtrl *extctr, 
+                                     RecWord* recword, 
+                                     xmlNodePtr node)
 {
     /* remember indexing instruction from PI to next element node */
     xmlChar *index_p = 0;
 
-    /* printf("ELEM   %s\n", xmlGetNodePath(node)); */
-
     /* check if we are an element node in the special zebra namespace 
        and either set record data or index value-of node content*/
-    process_xml_element_zebra_node(tinfo, recctr, node);
+    process_xml_element_zebra_node(tinfo, extctr, recword, node);
   
     /* loop through kid nodes */
     for (node = node->children; node; node = node->next)
         {
             /* check and set PI record and index index instructions */
             if (node->type == XML_PI_NODE){
-                process_xml_pi_node(tinfo, recctr, node, &index_p);
+                process_xml_pi_node(tinfo, extctr, node, &index_p);
             }
             else if (node->type == XML_ELEMENT_NODE){
                 /* if there was a PI index instruction before this element */
                 if (index_p){
-                    index_value_of(tinfo, recctr, node, index_p);            
+                    index_value_of(tinfo, extctr, recword, node, index_p);
                     index_p = 0;
                 }
-                process_xml_element_node(tinfo, recctr, node);
+                process_xml_element_node(tinfo, extctr, recword,node);
             }
             else
                 continue;
@@ -1066,13 +965,24 @@ void process_xml_element_node(struct filter_info *tinfo,
 
 
 /* DOM filter style indexing */
-void extract_dom_doc_node(struct filter_info *tinfo, 
-                          struct recExtractCtrl *recctr, 
-                          xmlDocPtr doc)
+static void extract_dom_doc_node(struct filter_info *tinfo, 
+                                 struct recExtractCtrl *extctr, 
+                                 xmlDocPtr doc)
 {
-    /* printf("DOC    %s\n", xmlGetNodePath((xmlNodePtr)doc)); */
+    xmlChar *buf_out;
+    int len_out;
+
+    /* only need to do the initialization once, reuse recword for all terms */
+    RecWord recword;
+    (*extctr->init)(extctr, &recword);
+
+    if (extctr->flagShowRecords){
+        xmlDocDumpMemory(doc, &buf_out, &len_out);
+        fwrite(buf_out, len_out, 1, stdout);
+        xmlFree(buf_out);
+    }
 
-    process_xml_element_node(tinfo, recctr, (xmlNodePtr)doc);
+    process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
 }
 
 
@@ -1084,7 +994,6 @@ static int convert_extract_doc(struct filter_info *tinfo,
                                xmlDocPtr doc)
 
 {
-    /* RecWord recWord; */
     xmlChar *buf_out;
     int len_out;
     const char *params[10];
@@ -1092,7 +1001,7 @@ static int convert_extract_doc(struct filter_info *tinfo,
     xmlDocPtr store_doc = 0;
 
     params[0] = 0;
-    set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr_record);
+    set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
 
     /* input conversion */
     perform_convert(tinfo, input->convert, params, &doc, 0);
@@ -1124,7 +1033,7 @@ static int convert_extract_doc(struct filter_info *tinfo,
     /* finally, do the indexing */
     if (doc){
         extract_dom_doc_node(tinfo, p, doc);
-        extract_doc_alvis(tinfo, p, doc);
+        /* extract_doc_alvis(tinfo, p, doc); */
        xmlFreeDoc(doc);
     }
 
@@ -1219,8 +1128,9 @@ static int extract_iso2709(struct filter_info *tinfo,
         {
             int i;
 
-            yaz_log(YLOG_WARN, "MARC: Skipping bad byte %d (0x%02X)",
-                    *buf & 0xff, *buf & 0xff);
+            yaz_log(YLOG_WARN, "%s dom filter: "
+                    "MARC: Skipping bad byte %d (0x%02X)",
+                    tinfo->fname, *buf & 0xff, *buf & 0xff);
             for (i = 0; i<4; i++)
                 buf[i] = buf[i+1];
 
@@ -1230,21 +1140,25 @@ static int extract_iso2709(struct filter_info *tinfo,
     record_length = atoi_n (buf, 5);
     if (record_length < 25)
         {
-            yaz_log (YLOG_WARN, "MARC record length < 25, is %d", 
-                     record_length);
+            yaz_log (YLOG_WARN, "%s dom filter: "
+                     "MARC record length < 25, is %d", 
+                     tinfo->fname, record_length);
             return RECCTRL_EXTRACT_ERROR_GENERIC;
         }
     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
     if (read_bytes < record_length-5)
         {
-            yaz_log (YLOG_WARN, "Couldn't read whole MARC record");
+            yaz_log (YLOG_WARN, "%s dom filter: "
+                     "Couldn't read whole MARC record",
+                     tinfo->fname);
             return RECCTRL_EXTRACT_ERROR_GENERIC;
         }
     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
     if (r < record_length)
         {
-            yaz_log (YLOG_WARN, "Parsing of MARC record failed r=%d length=%d",
-                     r, record_length);
+            yaz_log (YLOG_WARN, "%s dom filter: "
+                     "Parsing of MARC record failed r=%d length=%d",
+                     tinfo->fname, r, record_length);
             return RECCTRL_EXTRACT_ERROR_GENERIC;
         }
     else
@@ -1295,7 +1209,7 @@ static int ioclose_ret(void *context)
 
 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
 {
-    /* const char *esn = zebra_xslt_ns; */
+    /* const char *esn = zebra_dom_ns; */
     const char *esn = 0;
     const char *params[32];
     struct filter_info *tinfo = clientData;