Remove timeout member from COMSTACK
[yaz-moved-to-github.git] / src / marc_read_xml.c
index f448b5b..7356ca2 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2010 Index Data
+ * Copyright (C) 1995-2013 Index Data
  * See the file LICENSE for details.
  */
 
@@ -18,7 +18,6 @@
 
 #include <stdio.h>
 #include <string.h>
-#include <ctype.h>
 #include <yaz/marcdisp.h>
 #include <yaz/wrbuf.h>
 #include <yaz/yaz-util.h>
@@ -60,7 +59,7 @@ int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
                 }
                 if (ptr_code->type == XML_TEXT_NODE)
                 {
-                    ctrl_data_len = 
+                    ctrl_data_len =
                         strlen((const char *)ptr_code->content);
                 }
                 else
@@ -102,17 +101,16 @@ const char *tag_value_extract(const char *name, char tag_buffer[5])
 }
 
 // Given a xmlNode ptr,  extract a value from either a element name or from a given attribute
-const char *element_attribute_value_extract(const xmlNode *ptr,
-                                            const char *attribute_name,
-                                            NMEM nmem)
+char *element_attribute_value_extract(const xmlNode *ptr,
+                                      const char *attribute_name,
+                                      NMEM nmem)
 {
-
-    const char *name = ptr->name;
+    const char *name = (const char *) ptr->name;
     size_t length = strlen(name);
+    xmlAttr *attr;
     if (length > 1 )
         return nmem_strdup(nmem, name+1);
     // TODO Extract from attribute where matches attribute_name
-    xmlAttr *attr;
     for (attr = ptr->properties; attr; attr = attr->next)
         if (!strcmp((const char *)attr->name, attribute_name))
             return nmem_text_node_cdata(attr->children, nmem);
@@ -122,16 +120,16 @@ const char *element_attribute_value_extract(const xmlNode *ptr,
 
 int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
 {
-    NMEM nmem = yaz_marc_get_nmem(mt);
     for (; ptr; ptr = ptr->next)
     {
         if (ptr->type == XML_ELEMENT_NODE)
         {
-            xmlNode *p;
             if (!strncmp((const char *) ptr->name, "s", 1))
             {
                 NMEM nmem = yaz_marc_get_nmem(mt);
-                char *buffer = (char *) nmem_malloc(nmem, 5);
+                xmlNode *p;
+               size_t ctrl_data_len = 0;
+                char *ctrl_data_buf = 0;
                 const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
                 if (!tag_value)
                 {
@@ -140,11 +138,8 @@ int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
                     return -1;
                 }
 
-               size_t ctrl_data_len = 0;
-                char *ctrl_data_buf = 0;
                 ctrl_data_len = strlen((const char *) tag_value);
                 // Extract (length) from CDATA
-                xmlNode *p;
                 for (p = ptr->children; p ; p = p->next)
                     if (p->type == XML_TEXT_NODE)
                         ctrl_data_len += strlen((const char *)p->content);
@@ -169,9 +164,9 @@ int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
 }
 
 
-static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
+static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p,
+                                    int *indicator_length)
 {
-    int indicator_length;
     int identifier_length;
     int base_address;
     int length_data_entry;
@@ -190,18 +185,14 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
                 for(; p; p = p->next)
                     if (p->type == XML_TEXT_NODE)
                         leader = (const char *) p->content;
-                break;
-            }
-            else
-            {
-                yaz_marc_cprintf(
-                    mt, "Expected element 'leader', got '%.80s'", ptr->name);
+                ptr = ptr->next;
             }
+            break;
         }
     if (!leader)
     {
-        yaz_marc_cprintf(mt, "Missing element 'leader'");
-        return -1;
+        yaz_marc_cprintf(mt, "Missing leader. Inserting fake leader");
+        leader = "00000nam a22000000a 4500";
     }
     if (strlen(leader) != 24)
     {
@@ -210,7 +201,7 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
         return -1;
     }
     yaz_marc_set_leader(mt, leader,
-                        &indicator_length,
+                        indicator_length,
                         &identifier_length,
                         &base_address,
                         &length_data_entry,
@@ -220,7 +211,8 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
     return 0;
 }
 
-static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
+static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
+                                    int indicator_length)
 {
     for(; ptr; ptr = ptr->next)
         if (ptr->type == XML_ELEMENT_NODE)
@@ -253,18 +245,29 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
                 const xmlNode *ptr_tag = 0;
                 struct _xmlAttr *attr;
                 int i;
-                for (i = 0; i<11; i++)
-                    indstr[i] = '\0';
+                for (i = 0; i < indicator_length; i++)
+                    indstr[i] = ' ';
+                indstr[i] = '\0';
                 for (attr = ptr->properties; attr; attr = attr->next)
                     if (!strcmp((const char *)attr->name, "tag"))
                         ptr_tag = attr->children;
                     else if (strlen((const char *)attr->name) == 4 &&
                              !memcmp(attr->name, "ind", 3))
                     {
-                        int no = atoi((const char *)attr->name+3);
-                        if (attr->children
-                            && attr->children->type == XML_TEXT_NODE)
-                            indstr[no] = attr->children->content[0];
+                        int no = atoi((const char *)attr->name + 3);
+                        if (attr->children &&
+                            attr->children->type == XML_TEXT_NODE &&
+                            no <= indicator_length && no > 0 &&
+                            attr->children->content[0])
+                        {
+                            indstr[no - 1] = attr->children->content[0];
+                        }
+                        else
+                        {
+                            yaz_marc_cprintf(
+                                mt, "Bad attribute '%.80s' for 'datafield'",
+                                attr->name);
+                        }
                     }
                     else
                     {
@@ -278,10 +281,8 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
                         mt, "Missing attribute 'tag' for 'datafield'" );
                     return -1;
                 }
-                /* note that indstr[0] is unused so we use indstr[1..] */
                 yaz_marc_add_datafield_xml(mt, ptr_tag,
-                                           indstr+1, strlen(indstr+1));
-                
+                                           indstr, indicator_length);
                 if (yaz_marc_read_xml_subfields(mt, ptr->children))
                     return -1;
             }
@@ -296,9 +297,9 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
     return 0;
 }
 
-void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators);
 
-static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
+static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
+                                          int indicator_length)
 {
     for(; ptr; ptr = ptr->next)
         if (ptr->type == XML_ELEMENT_NODE)
@@ -306,24 +307,26 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
             if (!strncmp( (const char *) ptr->name, "c", 1))
             {
                 NMEM nmem = yaz_marc_get_nmem(mt);
-                const char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
+                char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
                 if (!tag_value)
                 {
                     yaz_marc_cprintf(
                         mt, "Missing attribute 'tag' for 'controlfield'" );
                     return -1;
                 }
-                yaz_marc_add_controlfield_turbo_xml(mt, tag_value, ptr->children);
+                yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
             }
             else if (!strncmp((const char *) ptr->name, "d",1))
             {
                 struct _xmlAttr *attr;
                 NMEM nmem = yaz_marc_get_nmem(mt);
-                char *indstr = nmem_malloc(nmem, 11);  /* 0(unused), 1,....9, + zero term */
-                int index = 0;
-                for (index = 0; index < 11; index++)
-                    indstr[index] = '\0';
-                const char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
+                char *tag_value;
+                char *indstr = nmem_malloc(nmem, indicator_length + 1);
+                int i = 0;
+                for (i = 0; i < indicator_length; i++)
+                    indstr[i] = ' ';
+                indstr[i] = '\0';
+                tag_value = element_attribute_value_extract(ptr, "tag", nmem);
                 if (!tag_value)
                 {
                     yaz_marc_cprintf(
@@ -335,21 +338,27 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
                         attr->name[0] == 'i')
                     {
                        //extract indicator attribute from i#="Y" pattern
-                        int no = atoi((const char *)attr->name+1);
-                        if (attr->children
-                            && attr->children->type == XML_TEXT_NODE)
-                            indstr[no] = attr->children->content[0];
+                        int no = atoi((const char *)attr->name + 1);
+                        if (attr->children &&
+                            attr->children->type == XML_TEXT_NODE &&
+                            no <= indicator_length && no > 0 &&
+                            attr->children->content[0])
+                        {
+                            indstr[no - 1] = attr->children->content[0];
+                        }
+                        else
+                        {
+                            yaz_marc_cprintf(
+                                mt, "Bad attribute '%.80s' for 'd'",attr->name);
+                        }
                     }
                     else
                     {
                         yaz_marc_cprintf(
-                            mt, "Bad attribute '%.80s' for 'datafield'",
-                            attr->name);
+                            mt, "Bad attribute '%.80s' for 'd'", attr->name);
                     }
-                /* note that indstr[0] is unused so we use indstr[1..] */
-                yaz_marc_add_datafield_turbo_xml(mt, tag_value, indstr+1);
-                int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */);
-                if (rc)
+                yaz_marc_add_datafield_xml2(mt, tag_value, indstr);
+                if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
                     return -1;
             }
             else
@@ -369,22 +378,24 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
 #if YAZ_HAVE_XML2
 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
 {
+    int indicator_length = 0;
+    int format = 0;
     yaz_marc_reset(mt);
 
     for(; ptr; ptr = ptr->next)
         if (ptr->type == XML_ELEMENT_NODE)
         {
-            //TODO Should actually look at the namespace but...
             if (!strcmp((const char *) ptr->name, "record"))
             {
-               yaz_marc_set_read_format(mt, YAZ_MARC_MARCXML);
+                format = YAZ_MARC_MARCXML;
                 break;
             }
             else if (!strcmp((const char *) ptr->name, "r"))
             {
-               yaz_marc_set_read_format(mt, YAZ_MARC_TMARCXML);
+                format = YAZ_MARC_TURBOMARC;
                 break;
             }
+            else
             {
                 yaz_marc_cprintf(
                     mt, "Unknown element '%.80s' in MARC XML reader",
@@ -399,15 +410,15 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
     }
     /* ptr points to record node now */
     ptr = ptr->children;
-    if (yaz_marc_read_xml_leader(mt, &ptr))
+    if (yaz_marc_read_xml_leader(mt, &ptr, &indicator_length))
         return -1;
 
-    switch (yaz_marc_get_read_format(mt))
+    switch (format)
     {
     case YAZ_MARC_MARCXML:
-        return yaz_marc_read_xml_fields(mt, ptr->next);
-    case YAZ_MARC_TMARCXML:
-        return yaz_marc_read_turbo_xml_fields(mt, ptr->next);
+        return yaz_marc_read_xml_fields(mt, ptr, indicator_length);
+    case YAZ_MARC_TURBOMARC:
+        return yaz_marc_read_turbo_xml_fields(mt, ptr, indicator_length);
     }
     return -1;
 }