optimized code such that the RecWord structure recword is only
[idzebra-moved-to-github.git] / index / mod_dom.c
index f4f5643..5137efd 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: mod_dom.c,v 1.13 2007-02-15 14:33:41 marc Exp $
+/* $Id: mod_dom.c,v 1.15 2007-02-15 15:08:41 marc Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
 #include <idzebra/util.h>
 #include <idzebra/recctrl.h>
 
-
-
-/* Alvis style indexing */
-#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
-static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
-
 /* DOM filter style indexing */
 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
@@ -664,144 +658,6 @@ static int ioclose_ex(void *context)
 }
 
 
-
-/* Alvis style indexing */
-static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
-                       xmlNodePtr ptr, RecWord *recWord)
-{
-    for(; ptr; ptr = ptr->next)
-        {
-            index_cdata(tinfo, ctrl, ptr->children, recWord);
-            if (ptr->type != XML_TEXT_NODE)
-                continue;
-            recWord->term_buf = (const char *)ptr->content;
-            recWord->term_len = XML_STRLEN(ptr->content);
-            (*ctrl->tokenAdd)(recWord);
-        }
-}
-
-/* Alvis style indexing */
-static void index_node(struct filter_info *tinfo,  struct recExtractCtrl *ctrl,
-                      xmlNodePtr ptr, RecWord *recWord)
-{
-    for(; ptr; ptr = ptr->next)
-        {
-            index_node(tinfo, ctrl, ptr->children, recWord);
-            if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
-                XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
-                continue;
-            if (!XML_STRCMP(ptr->name, "index"))
-                {
-                    const char *name_str = 0;
-                    const char *type_str = 0;
-                    const char *xpath_str = 0;
-                    struct _xmlAttr *attr;
-                    for (attr = ptr->properties; attr; attr = attr->next)
-                        {
-                            if (attr_content(attr, "name", &name_str))
-                                ;
-                            else if (attr_content(attr, "xpath", &xpath_str))
-                                ;
-                            else if (attr_content(attr, "type", &type_str))
-                                ;
-                            else
-                                yaz_log(YLOG_WARN, "%s: dom filter: "
-                                        "bad attribute %s for <index>",
-                                        tinfo->fname, attr->name);
-                        }
-                    if (name_str)
-                        {
-                            /* save default type */
-                            int prev_type = recWord->index_type; 
-
-                            /* type was given */
-                            if (type_str && *type_str)
-                                recWord->index_type = *type_str; 
-
-                            recWord->index_name = name_str;
-                            index_cdata(tinfo, ctrl, ptr->children, recWord);
-
-                            /* restore it again */
-                            recWord->index_type = prev_type;     
-                        }
-                }
-        }
-}
-
-/* Alvis style indexing */
-static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
-                        xmlNodePtr ptr, RecWord *recWord)
-{
-    const char *type_str = "update";
-
-    if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
-       !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
-       && !XML_STRCMP(ptr->name, "record"))
-        {
-            const char *id_str = 0;
-            const char *rank_str = 0;
-            struct _xmlAttr *attr;
-            for (attr = ptr->properties; attr; attr = attr->next)
-                {
-                    if (attr_content(attr, "type", &type_str))
-                        ;
-                    else if (attr_content(attr, "id", &id_str))
-                        ;
-                    else if (attr_content(attr, "rank", &rank_str))
-                        ;
-                    else
-                        yaz_log(YLOG_WARN, "%s: dom filter: "
-                                "bad attribute %s for <record>",
-                                tinfo->fname, attr->name);
-                }
-            if (id_str)
-                sscanf(id_str, "%255s", ctrl->match_criteria);
-
-            if (rank_str)
-                ctrl->staticrank = atozint(rank_str);
-            ptr = ptr->children;
-        }
-
-    if (!strcmp("update", type_str))
-        index_node(tinfo, ctrl, ptr, recWord);
-    else if (!strcmp("delete", type_str))
-        yaz_log(YLOG_WARN, "%s dom filter: "
-                "delete: to be implemented");
-    else
-        yaz_log(YLOG_WARN, "dom filter: "
-                "unknown record type '%s'", 
-                type_str);
-}
-
-
-/* Alvis style indexing */
-static void extract_doc_alvis(struct filter_info *tinfo, 
-                              struct recExtractCtrl *extctr, 
-                              xmlDocPtr doc)
-{
-    if (doc){
-        RecWord recWord;
-        xmlChar *buf_out;
-        int len_out;
-        xmlNodePtr root_ptr;
-
-        (*extctr->init)(extctr, &recWord);
-        
-       if (extctr->flagShowRecords){
-            xmlDocDumpMemory(doc, &buf_out, &len_out);
-           fwrite(buf_out, len_out, 1, stdout);
-           xmlFree(buf_out);
-       }
-       root_ptr = xmlDocGetRootElement(doc);
-       if (root_ptr)
-           index_record(tinfo, extctr, root_ptr, &recWord);
-        else
-            yaz_log(YLOG_WARN, "%s dom filter: "
-                    "No root for index XML record");
-    }
-}
-
-
 /* DOM filter style indexing */
 static int attr_content_xml(struct _xmlAttr *attr, const char *name,
                             xmlChar **dst_content)
@@ -818,7 +674,8 @@ static int attr_content_xml(struct _xmlAttr *attr, const char *name,
 
 /* DOM filter style indexing */
 static void index_value_of(struct filter_info *tinfo, 
-                           struct recExtractCtrl *extctr, 
+                           struct recExtractCtrl *extctr,
+                           RecWord* recword, 
                            xmlNodePtr node, 
                            xmlChar * index_p)
 {
@@ -837,10 +694,8 @@ static void index_value_of(struct filter_info *tinfo,
             xmlChar type[256];
 
             /* assingning text to be indexed */
-            RecWord recWord;
-            (*extctr->init)(extctr, &recWord);
-            recWord.term_buf = (const char *)text;
-            recWord.term_len = text_len;
+            recword->term_buf = (const char *)text;
+            recword->term_len = text_len;
 
             /* parsing all index name/type pairs */
             /* may not start with ' ' or ':' */
@@ -878,10 +733,10 @@ static void index_value_of(struct filter_info *tinfo,
                         "INDEX  '%s:%s' '%s'", 
                         tinfo->fname, index, type, text);
 
-                recWord.index_name = (const char *)index;
+                recword->index_name = (const char *)index;
                 if (type && *type)
-                    recWord.index_type = *type;
-                (extctr->tokenAdd)(&recWord);
+                    recword->index_type = *type;
+                (extctr->tokenAdd)(recword);
 
                 /* eat whitespaces */
                 if (*look && ' ' == *look && *(look+1)){
@@ -912,7 +767,7 @@ static void set_record_info(struct filter_info *tinfo,
         extctr->staticrank = atozint((const char *)rank_p);
 
     /*     if (!strcmp("update", type_str)) */
-    /*         index_node(tinfo, ctrl, ptr, recWord); */
+    /*         index_node(tinfo, ctrl, ptr, recword); */
     /*     else if (!strcmp("delete", type_str)) */
     /*         yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */
     /*     else */
@@ -925,6 +780,7 @@ static void set_record_info(struct filter_info *tinfo,
 /* DOM filter style indexing */
 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
                                            struct recExtractCtrl *extctr, 
+                                           RecWord* recword, 
                                            xmlNodePtr node)
 {
     if (node->type == XML_ELEMENT_NODE 
@@ -936,7 +792,7 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
             struct _xmlAttr *attr;      
             for (attr = node->properties; attr; attr = attr->next){
                 if (attr_content_xml(attr, "name", &index_p)){
-                    index_value_of(tinfo, extctr, node, index_p);        
+                    index_value_of(tinfo, extctr, recword,node, index_p);
                 }  
                 else
                     yaz_log(YLOG_WARN,"%s dom filter: "
@@ -1077,16 +933,15 @@ static void process_xml_pi_node(struct filter_info *tinfo,
 /* DOM filter style indexing */
 static void process_xml_element_node(struct filter_info *tinfo, 
                                      struct recExtractCtrl *extctr, 
+                                     RecWord* recword, 
                                      xmlNodePtr node)
 {
     /* remember indexing instruction from PI to next element node */
     xmlChar *index_p = 0;
 
-    /* yaz_log(YLOG_DEBUG,"ELEM   %s\n", xmlGetNodePath(node)); */
-
     /* check if we are an element node in the special zebra namespace 
        and either set record data or index value-of node content*/
-    process_xml_element_zebra_node(tinfo, extctr, node);
+    process_xml_element_zebra_node(tinfo, extctr, recword, node);
   
     /* loop through kid nodes */
     for (node = node->children; node; node = node->next)
@@ -1098,10 +953,10 @@ static void process_xml_element_node(struct filter_info *tinfo,
             else if (node->type == XML_ELEMENT_NODE){
                 /* if there was a PI index instruction before this element */
                 if (index_p){
-                    index_value_of(tinfo, extctr, node, index_p);            
+                    index_value_of(tinfo, extctr, recword, node, index_p);
                     index_p = 0;
                 }
-                process_xml_element_node(tinfo, extctr, node);
+                process_xml_element_node(tinfo, extctr, recword,node);
             }
             else
                 continue;
@@ -1114,17 +969,20 @@ static void extract_dom_doc_node(struct filter_info *tinfo,
                                  struct recExtractCtrl *extctr, 
                                  xmlDocPtr doc)
 {
-    /* yaz_log(YLOG_DEBUG,"DOC    %s\n", xmlGetNodePath((xmlNodePtr)doc)); */
-
     xmlChar *buf_out;
     int len_out;
+
+    /* only need to do the initialization once, reuse recword for all terms */
+    RecWord recword;
+    (*extctr->init)(extctr, &recword);
+
     if (extctr->flagShowRecords){
         xmlDocDumpMemory(doc, &buf_out, &len_out);
         fwrite(buf_out, len_out, 1, stdout);
         xmlFree(buf_out);
     }
 
-    process_xml_element_node(tinfo, extctr, (xmlNodePtr)doc);
+    process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
 }