Re-implemented the element name encoding as Adams suggestion: <e tag="value"> when...
[yaz-moved-to-github.git] / src / marc_read_xml.c
index f68906a..3f3e05b 100644 (file)
@@ -99,35 +99,25 @@ const char *tag_value_extract(const char *name, char tag_buffer[5]) {
        return 0;
 }
 
-// pattern <one character or -AB[CD]
-const char *code_value_extract(const char *name, char tag_buffer[5]) {
+// Given a xmlNode ptr,  extract a value from either a element name or from a given attribute
+const char *element_attribute_value_extract(const xmlNode *ptr, const char *attribute_name, NMEM nmem) {
+
+       const char *name = ptr->name;
        size_t length = strlen(name);
-       if (length == 1 ) {
-               return name;
-       }
-       if (length > 2 && length < 6) {
-               if (name[0] != '-') {
-                       return 0;
-               }
-               length--;
-               const char *ptr = name+1;
-               int index = 0;
-               for (index = 0; index < length/2; index++) {
-                       unsigned int value;
-                       char temp[3];
-                       strncpy(temp, ptr + 2*index, 2);
-                       sscanf(temp, "%02X", &value);
-                       tag_buffer[index] = (unsigned char) value;
-               }
-               tag_buffer[index] = '\0';
-               if (index > 0)
-                       return tag_buffer;
+       if (length > 1 ) {
+               return nmem_strdup(nmem, name+1);
        }
+       // TODO Extract from attribute where matches attribute_name
+       xmlAttr *attr;
+    for (attr = ptr->properties; attr; attr = attr->next)
+        if (!strcmp((const char *)attr->name, attribute_name)) {
+               return nmem_text_node_cdata(attr->children, nmem);
+        }
        return 0;
 }
 
 
-int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr, char indicators[11])
+int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
 {
     NMEM nmem = yaz_marc_get_nmem(mt);
     for (; ptr; ptr = ptr->next)
@@ -135,22 +125,11 @@ int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr, char in
         if (ptr->type == XML_ELEMENT_NODE)
         {
                xmlNode *p;
-               if (!strncmp((const char *) ptr->name, "i", 1)) {
-               int length = strlen(ptr->name+1);
-               if (length > 0) {
-                       int index = (int)strtol(ptr->name+1, (char **)NULL, 10);
-                               for (p = ptr->children; p ; p = p->next)
-                        if (p->type == XML_TEXT_NODE) {
-                            indicators[index] = ((const char *)p->content)[0];
-                            break;
-                        }
-               }
-            }
-            else if (!strncmp((const char *) ptr->name, "s", 1))
+            if (!strncmp((const char *) ptr->name, "s", 1))
             {
                        NMEM nmem = yaz_marc_get_nmem(mt);
                        char *buffer = (char *) nmem_malloc(nmem, 5);
-                               const char *tag_value = code_value_extract((ptr->name+1), buffer);
+                               const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
                 if (!tag_value)
                 {
                     yaz_marc_cprintf(
@@ -314,7 +293,7 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
     return 0;
 }
 
-struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value);
+void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators);
 
 static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
 {
@@ -324,8 +303,7 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
                if (!strncmp( (const char *) ptr->name, "c", 1))
             {
                        NMEM nmem = yaz_marc_get_nmem(mt);
-                       char *buffer = (char *) nmem_malloc(nmem, 5);
-                       const char *tag_value = tag_value_extract((const char *)(ptr->name+1), buffer);
+                       const char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
                 if (!tag_value)
                 {
                     yaz_marc_cprintf(
@@ -336,21 +314,38 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
             }
             else if (!strncmp((const char *) ptr->name, "d",1))
             {
+                struct _xmlAttr *attr;
                        NMEM nmem = yaz_marc_get_nmem(mt);
                 char *indstr = nmem_malloc(nmem, 11);  /* 0(unused), 1,....9, + zero term */
-                       char *buffer = (char *) nmem_malloc(nmem, 5);
-                               const char *tag_value = tag_value_extract(ptr->name+1, buffer);
+                int index = 0;
+                for (index = 0; index < 11; index++)
+                                       indstr[index] = '\0';
+                       const char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
                 if (!tag_value)
                                {
                     yaz_marc_cprintf(
                         mt, "Missing attribute 'tag' for 'datafield'" );
                     return -1;
                 }
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (strlen((const char *)attr->name) == 2 &&
+                             attr->name[0] == 'i')
+                    {
+                       //extract indicator attribute from i#="Y" pattern
+                        int no = atoi((const char *)attr->name+1);
+                        if (attr->children
+                            && attr->children->type == XML_TEXT_NODE)
+                            indstr[no] = attr->children->content[0];
+                    }
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'datafield'",
+                            attr->name);
+                    }
                 /* note that indstr[0] is unused so we use indstr[1..] */
-                struct yaz_marc_node *n = yaz_marc_add_datafield_turbo_xml(mt, tag_value);
-
-                int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children, indstr);
-                yaz_marc_datafield_set_indicators(n, indstr+1, strlen(indstr+1));
+                yaz_marc_add_datafield_turbo_xml(mt, tag_value, indstr+1);
+                int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */);
                 if (rc)
                     return -1;
             }
@@ -376,9 +371,15 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
     for(; ptr; ptr = ptr->next)
         if (ptr->type == XML_ELEMENT_NODE)
         {
-            if (!strcmp((const char *) ptr->name, "record"))
+                       //TODO Should actually look at the namespace but...
+            if (!strcmp((const char *) ptr->name, "record")) {
+               yaz_marc_set_read_format(mt, YAZ_MARC_MARCXML);
                 break;
-            else
+            }
+            else if (!strcmp((const char *) ptr->name, "r")) {
+               yaz_marc_set_read_format(mt, YAZ_MARC_TMARCXML);
+                break;
+            }
             {
                 yaz_marc_cprintf(
                     mt, "Unknown element '%.80s' in MARC XML reader",