Changed some types in mod_dom.c ; mostly 'xmlChar *' to 'const char *'.
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 14 Mar 2007 14:16:14 +0000 (14:16 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 14 Mar 2007 14:16:14 +0000 (14:16 +0000)
The use of const is more appropriate than non-const becuase these
string references point to xmlNode content - and we are not allowed
to change that. Added buffer safe PI attribute reading for mod_dom.c by
implementing function attr_content_pi. Function index_value_of still has
potential buffer flows. The record extraction system now has a new member,
action, which may be modified by a record filter to signal
delete/replace/insert. This is only honoured if update is used (in which
case the outer system already has said "we don't care whether it's insert
or replace anyway). Added mod_dom test for the use for @type=delete .

include/idzebra/recctrl.h
index/extract.c
index/mod_dom.c
test/xslt/Makefile.am
test/xslt/del-col.xml [new file with mode: 0644]
test/xslt/dom-config-del.xml [new file with mode: 0644]
test/xslt/dom1.c

index ac5f448..cd8e7a0 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: recctrl.h,v 1.32 2007-03-14 11:48:31 adam Exp $
+/* $Id: recctrl.h,v 1.33 2007-03-14 14:16:14 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -107,6 +107,7 @@ struct recExtractCtrl {
     void      (*schemaAdd)(struct recExtractCtrl *p, Odr_oid *oid);
     data1_handle dh;
     void      *handle;
+    enum zebra_recctrl_action_t action;
 };
 
 /* Retrieve record control */
index f6fab5d..830edfb 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.252 2007-03-14 11:48:32 adam Exp $
+/* $Id: extract.c,v 1.253 2007-03-14 14:16:14 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -600,13 +600,21 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh,
         extractCtrl.handle = zh;
         extractCtrl.match_criteria[0] = '\0';
         extractCtrl.staticrank = 0;
+        extractCtrl.action = action;
 
         init_extractCtrl(zh, &extractCtrl);
-        
+
         extract_set_store_data_prepare(&extractCtrl);
         
         r = (*recType->extract)(recTypeClientData, &extractCtrl);
 
+        yaz_log(YLOG_LOG, "Old action=%d new action=%d", action,
+                extractCtrl.action);
+        if (action == action_update)
+        {
+            action = extractCtrl.action;
+        }
+        
         switch (r)
         {
         case RECCTRL_EXTRACT_EOF:
@@ -916,6 +924,8 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n)
     extractCtrl.flagShowRecords = 0;
     extractCtrl.match_criteria[0] = '\0';
     extractCtrl.staticrank = 0;
+    extractCtrl.action = action_update;
+
     extractCtrl.handle = handle;
     extractCtrl.first_record = 1;
     
index 66f11aa..eb4bc63 100644 (file)
@@ -1,5 +1,5 @@
 
-/* $Id: mod_dom.c,v 1.31 2007-03-08 17:19:12 marc Exp $
+/* $Id: mod_dom.c,v 1.32 2007-03-14 14:16:14 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -77,7 +77,6 @@ struct filter_retrieve {
     struct filter_retrieve *next;
 };
 
-#define DOM_INPUT_DOM 0
 #define DOM_INPUT_XMLREADER 1
 #define DOM_INPUT_MARC 2
 struct filter_input {
@@ -87,9 +86,6 @@ struct filter_input {
     int type;
     union {
         struct {
-            int dummy;
-        } dom;
-        struct {
             xmlTextReaderPtr reader;
             int split_level;
         } xmlreader;
@@ -239,8 +235,6 @@ static void destroy_dom(struct filter_info *tinfo)
         {
             switch(i_ptr->type)
             {
-            case DOM_INPUT_DOM:
-                break;
             case DOM_INPUT_XMLREADER:
                 if (i_ptr->u.xmlreader.reader)
                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
@@ -468,16 +462,10 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
             parse_convert(tinfo, ptr, &p->convert);
             break;
         }
-        else if (!XML_STRCMP(ptr->name, "xslt")){
-            struct filter_input *p 
-                = new_input(tinfo, DOM_INPUT_DOM);
-            parse_convert(tinfo, ptr, &p->convert);
-            break;
-        }
         else
         {
             dom_log(YLOG_WARN, tinfo, ptr,
-                    "bad element <%s>, expected <marc>|<xmlreader>|<xslt>",
+                    "bad element <%s>, expected <marc>|<xmlreader>",
                     ptr->name);
             return ZEBRA_FAIL;
         }
@@ -643,13 +631,13 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
             return ZEBRA_FAIL;
         }
     }
-
-    /* adding an empty DOM dummy type if no <input> list has been defined */
-    if (! tinfo->input_list){
-          struct filter_input *p 
-                = new_input(tinfo, DOM_INPUT_DOM);
+    if (!tinfo->input_list)
+    {
+        struct filter_input *p 
+            = new_input(tinfo, DOM_INPUT_XMLREADER);
+        p->u.xmlreader.split_level = 0;
+        p->u.xmlreader.reader = 0;
     }
-    
     return ZEBRA_OK;
 }
 
@@ -716,12 +704,12 @@ static int ioclose_ex(void *context)
 
 /* DOM filter style indexing */
 static int attr_content_xml(struct _xmlAttr *attr, const char *name,
-                            xmlChar **dst_content)
+                            const char **dst_content)
 {
     if (0 == XML_STRCMP(attr->name, name) && attr->children 
         && attr->children->type == XML_TEXT_NODE)
     {
-        *dst_content = (attr->children->content);
+        *dst_content = (const char *) (attr->children->content);
         return 1;
     }
     return 0;
@@ -733,7 +721,7 @@ static void index_value_of(struct filter_info *tinfo,
                            struct recExtractCtrl *extctr,
                            RecWord* recword, 
                            xmlNodePtr node, 
-                           xmlChar * index_p)
+                           const char *index_p)
 {
     if (tinfo->record_info_invoked == 1)
     {
@@ -743,9 +731,9 @@ static void index_value_of(struct filter_info *tinfo,
         /* if there is no text, we do not need to proceed */
         if (text_len)
         {            
-            xmlChar *look = index_p;
-            xmlChar *bval;
-            xmlChar *eval;
+            const char *look = index_p;
+            const char *bval;
+            const char *eval;
 
             xmlChar index[256];
             xmlChar type[256];
@@ -814,7 +802,7 @@ static void index_value_of(struct filter_info *tinfo,
                 (extctr->tokenAdd)(recword);
 
                 /* eat whitespaces */
-                if (*look && ' ' == *look && *(look+1))
+                if (*look && ' ' == *look)
                 {
                     look++;
                 } 
@@ -829,33 +817,42 @@ static void index_value_of(struct filter_info *tinfo,
 static void set_record_info(struct filter_info *tinfo, 
                             struct recExtractCtrl *extctr, 
                             xmlNodePtr node, 
-                            xmlChar * id_p, 
-                            xmlChar * rank_p, 
-                            xmlChar * type_p)
+                            const char * id_p, 
+                            const char * rank_p, 
+                            const char * type_p)
 {
-
     /* writing debug info out */
-    if (extctr->flagShowRecords)
-        dom_log(YLOG_LOG, tinfo, 0,
+    if (1 || extctr->flagShowRecords)
+        dom_log(YLOG_LOG, tinfo, node,
                 "RECORD id=%s rank=%s type=%s", 
                 id_p ? (const char *) id_p : "(null)",
                 rank_p ? (const char *) rank_p : "(null)",
                 type_p ? (const char *) type_p : "(null)");
     
 
-    if (id_p)
+    if (id_p && *id_p)
         sscanf((const char *)id_p, "%255s", extctr->match_criteria);
 
-    if (rank_p)
+    if (rank_p && *rank_p)
         extctr->staticrank = atozint((const char *)rank_p);
 
-    /*     if (!strcmp("update", type_str)) */
-    /*         index_node(tinfo, ctrl, ptr, recword); */
-    /*     else if (!strcmp("delete", type_str)) */
-    /*         dom_log(YLOG_WARN, tinfo, ptr, "dom filter delete: to be implemented"); */
-    /*     else */
-    /*         dom_log(YLOG_WARN, tinfo, ptr, "dom filter: unknown record type '%s'",  */
-    /*                 type_str); */
+    if (type_p && *type_p)
+    {
+        enum zebra_recctrl_action_t action = action_update;
+        if (!strcmp(type_p, "insert"))
+            action = action_insert;
+        else if (!strcmp(type_p, "delete"))
+            action = action_delete;
+        else if (!strcmp(type_p, "replace"))
+            action = action_replace;
+        else if (!strcmp(type_p, "update"))
+            action = action_update;
+        else
+            dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
+        extctr->action = action;
+        yaz_log(YLOG_LOG, "In mod_dom.c: setting action to %d", action);
+    }
+
     if (tinfo->record_info_invoked == 1)
     {
         /* warn about multiple only once */
@@ -877,14 +874,14 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
     {
          if (0 == XML_STRCMP(node->name, "index"))
          {
-            xmlChar *index_p = 0;
+            const char *index_p = 0;
 
             struct _xmlAttr *attr;      
             for (attr = node->properties; attr; attr = attr->next)
             {
                 if (attr_content_xml(attr, "name", &index_p))
                 {
-                    index_value_of(tinfo, extctr, recword,node, index_p);
+                    index_value_of(tinfo, extctr, recword, node, index_p);
                 }  
                 else
                 {
@@ -896,9 +893,9 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
         }
         else if (0 == XML_STRCMP(node->name, "record"))
         {
-            xmlChar *id_p = 0;
-            xmlChar *rank_p = 0;
-            xmlChar *type_p = 0;
+            const char *id_p = 0;
+            const char *rank_p = 0;
+            const char *type_p = 0;
 
             struct _xmlAttr *attr;
             for (attr = node->properties; attr; attr = attr->next)
@@ -915,13 +912,6 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
                             "bad attribute @%s, expected @id|@rank|@type",
                             attr->name);
                 }
-
-                if (type_p && 0 != strcmp("update", (const char *)type_p))
-                {
-                    dom_log(YLOG_WARN, tinfo, node,
-                            "attribute @%s, only implemented '@type='update'",
-                            attr->name);
-                }
             }
             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
         } 
@@ -935,80 +925,74 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
     }
 }
 
+static int attr_content_pi(const char **c_ptr, const char *name,
+                           char *value, size_t value_max)
+{
+    size_t name_len = strlen(name);
+    const char *look = *c_ptr;
+    int ret = 0;
+
+    *value = '\0';
+    while (*look && ' ' == *look)
+        look++;
+    if (strlen(look) > name_len)
+    {
+        if (look[name_len] == '=' && !memcmp(look, name, name_len))
+        {
+            size_t i = 0;
+            look += name_len+1;
+            while (*look && ' ' != *look)
+            {
+                if (i < value_max-1)
+                    value[i++] = *look;
+                look++;
+            }
+            value[i] = '\0';
+            ret = 1;
+        }
+    }
+    while (*look && ' ' == *look)
+        look++;
+    *c_ptr = look;
+    return ret;
+}
 
 /* DOM filter style indexing */
 static void process_xml_pi_node(struct filter_info *tinfo, 
                                 struct recExtractCtrl *extctr, 
                                 xmlNodePtr node,
-                                xmlChar **index_pp)
+                                const char **index_pp)
 {
     /* if right PI name, continue parsing PI */
     if (0 == strcmp(zebra_pi_name, (const char *)node->name))
     {
         xmlChar *pi_p =  node->content;
-        xmlChar *look = pi_p;
+        const char *look = (const char *) node->content;
     
-        xmlChar *bval;
-        xmlChar *eval;
-
         /* parsing PI record instructions */
         if (0 == strncmp((const char *)look, "record", 6))
         {
-            xmlChar id[256];
-            xmlChar rank[256];
-            xmlChar type[256];
-
+            char id[256];
+            char rank[256];
+            char type[256];
+            
             *id = '\0';
             *rank = '\0';
             *type = '\0';
-      
             look += 6;
-      
-            /* eat whitespace */
-            while (*look && ' ' == *look && *(look+1))
-                look++;
-
-            /* parse possible id */
-            if (*look && 0 == strncmp((const char *)look, "id=", 3))
-            {
-                look += 3;
-                bval = look;
-                while (*look && ' ' != *look)
-                    look++;
-                eval = look;
-                strncpy((char *)id, (const char *)bval, eval - bval);
-                id[eval - bval] = '\0';
-            }
-      
-            /* eat whitespace */
-            while (*look && ' ' == *look && *(look+1))
-                look++;
-      
-            /* parse possible rank */
-            if (*look && 0 == strncmp((const char *)look, "rank=", 5))
-            {
-                look += 6;
-                bval = look;
-                while (*look && ' ' != *look)
-                    look++;
-                eval = look;
-                strncpy((char *)rank, (const char *)bval, eval - bval);
-                rank[eval - bval] = '\0';
-            }
-
-            /* eat whitespace */
-            while (*look && ' ' == *look && *(look+1))
-                look++;
-
-            if (look && '\0' != *look)
-            {
-                dom_log(YLOG_WARN, tinfo, node,
-                        "content '%s', can not parse '%s'",
-                        pi_p, look);
-            }
-            else 
-                set_record_info(tinfo, extctr, node, id, rank, 0);
-
+            while (*look)
+                if (attr_content_pi(&look, "id", id, sizeof(id)))
+                    ;
+                else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
+                    ;
+                else if (attr_content_pi(&look, "type", type, sizeof(type)))
+                {
+                    dom_log(YLOG_WARN, tinfo, node,
+                            "content '%s', can not parse '%s'",
+                            pi_p, look);
+                    break;
+                }
+            set_record_info(tinfo, extctr, node, id, rank, type);
         } 
         /* parsing index instruction */
         else if (0 == strncmp((const char *)look, "index", 5))
@@ -1016,7 +1000,7 @@ static void process_xml_pi_node(struct filter_info *tinfo,
             look += 5;
       
             /* eat whitespace */
-            while (*look && ' ' == *look && *(look+1))
+            while (*look && ' ' == *look)
                 look++;
 
             /* export index instructions to outside */
@@ -1038,7 +1022,7 @@ static void process_xml_element_node(struct filter_info *tinfo,
                                      xmlNodePtr node)
 {
     /* remember indexing instruction from PI to next element node */
-    xmlChar *index_p = 0;
+    const char *index_p = 0;
 
     /* check if we are an element node in the special zebra namespace 
        and either set record data or index value-of node content*/
@@ -1305,16 +1289,12 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
     struct filter_info *tinfo = clientData;
     struct filter_input *input = tinfo->input_list;
 
-
     if (!input)
-        return RECCTRL_EXTRACT_ERROR_GENERIC; 
+        return RECCTRL_EXTRACT_ERROR_GENERIC;
     
     odr_reset(tinfo->odr_record);
     switch(input->type)
     {
-    case DOM_INPUT_DOM:
-        return extract_xml_full(tinfo, input, p);
-        break;
     case DOM_INPUT_XMLREADER:
         if (input->u.xmlreader.split_level == 0)
             return extract_xml_full(tinfo, input, p);
index afc0557..343dfa8 100644 (file)
@@ -1,4 +1,4 @@
-# $Id: Makefile.am,v 1.16 2007-03-08 11:24:50 marc Exp $
+# $Id: Makefile.am,v 1.17 2007-03-14 14:16:14 adam Exp $
 
 check_PROGRAMS = xslt1 xslt2 xslt3 xslt4 xslt5 dom1
 TESTS = $(check_PROGRAMS)
@@ -12,6 +12,8 @@ EXTRA_DIST= \
      dom-index-element-chop.xsl \
      dom-index-pi.xsl \
      dom-index-skipped.xsl \
+     dom-config-del.xml \
+     del-col.xml \
      id.xsl \
      index.xsl \
      marc-col.mrc \
diff --git a/test/xslt/del-col.xml b/test/xslt/del-col.xml
new file mode 100644 (file)
index 0000000..b532cd7
--- /dev/null
@@ -0,0 +1,25 @@
+<sample xmlns:zs="http://indexdata.com/zebra-2.0">
+<rec>
+  <zs:record id="id1"/>
+  <zs:index name="title">a 1</zs:index>
+</rec>
+
+<rec>
+  <zs:record id="id2"/>
+  <zs:index name="title">a 2</zs:index>
+</rec>
+
+<rec>
+  <zs:record id="id3"/>
+  <zs:index name="title">a 3</zs:index>
+</rec>
+
+<rec>
+  <zs:record id="id1" type="delete"/>
+</rec>
+
+<rec>
+  <zs:record id="id2" type="replace"/>
+  <zs:index name="title">b</zs:index>
+</rec>
+</sample>
diff --git a/test/xslt/dom-config-del.xml b/test/xslt/dom-config-del.xml
new file mode 100644 (file)
index 0000000..9585845
--- /dev/null
@@ -0,0 +1,12 @@
+<dom>
+  <!-- $Id: dom-config-del.xml,v 1.1 2007-03-14 14:16:14 adam Exp $ -->
+  <extract name="index">
+   <xslt stylesheet="id.xsl"/>
+  </extract>
+  <retrieve name="F">
+    <xslt stylesheet="id.xsl"/>
+  </retrieve>
+  <input syntax="xml">
+    <xmlreader level="1"/>
+  </input>
+</dom>
index 236d1c5..b01015c 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: dom1.c,v 1.2 2007-03-05 13:02:11 marc Exp $
+/* $Id: dom1.c,v 1.3 2007-03-14 14:16:14 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -23,20 +23,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/test.h>
 #include "testlib.h"
 
-ZebraHandle index_some(ZebraService zs,
-                       const char *filter, const char *file)
+
+void index_more(ZebraHandle zh, const char *filter, const char *file)
 {
     char path[256];
     char profile_path[256];
 
-    ZebraHandle zh = zebra_open(zs, 0);
-
-    tl_check_filter(zs, "dom");
-
-    YAZ_CHECK(zebra_select_database(zh, "Default") == ZEBRA_OK);
-
-    zebra_init(zh);
-
     sprintf(profile_path, "%.80s:%.80s/../../tab", 
             tl_get_srcdir(), tl_get_srcdir());
     zebra_set_resource(zh, "profilePath", profile_path);
@@ -49,6 +41,20 @@ ZebraHandle index_some(ZebraService zs,
     YAZ_CHECK(zebra_repository_update(zh, path) == ZEBRA_OK);
     YAZ_CHECK(zebra_end_trans(zh) == ZEBRA_OK);
     zebra_commit(zh);
+}
+
+ZebraHandle index_some(ZebraService zs,
+                       const char *filter, const char *file)
+{
+    ZebraHandle zh = zebra_open(zs, 0);
+
+    tl_check_filter(zs, "dom");
+
+    YAZ_CHECK(zebra_select_database(zh, "Default") == ZEBRA_OK);
+
+    zebra_init(zh);
+
+    index_more(zh, filter, file);
     return zh;
 }
 
@@ -60,7 +66,8 @@ void tst(int argc, char **argv)
 
     zh = index_some(zs, "dom.bad.xml", "marc-col.xml");
     zebra_close(zh);
-    
+
+   
     /* testing XMLREADER input with PI stylesheet */ 
     zh = index_some(zs, "dom.dom-config-col.xml", "marc-col.xml");
     YAZ_CHECK(tl_query(zh, "@attr 1=title computer", 3));
@@ -92,6 +99,14 @@ void tst(int argc, char **argv)
     YAZ_CHECK(tl_query(zh, "@attr 1=control 11224467", 1));
     YAZ_CHECK(tl_query(zh, "@attr 1=control 73090924", 0));
 
+    /* testing XMLREADER input with type attributes (insert,delete,..) */ 
+    zh = index_some(zs, "dom.dom-config-del.xml", "del-col.xml");
+    YAZ_CHECK(tl_query(zh, "@attr 1=title a", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title 1", 0));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title 2", 0));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title 3", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=title b", 1));
+
     zebra_close(zh);