Fix dup'ed identifers for MARC-8 encoding YAZ-650
[yaz-moved-to-github.git] / src / marcdisp.c
index 2542806..ff4c97e 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2010 Index Data
+ * Copyright (C) 1995-2013 Index Data
  * See the file LICENSE for details.
  */
 
@@ -20,7 +20,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
 #include <yaz/marcdisp.h>
 #include <yaz/wrbuf.h>
 #include <yaz/yaz-util.h>
@@ -37,10 +36,10 @@ enum yaz_collection_state {
     collection_first,
     collection_second
 };
-   
+
 /** \brief node types for yaz_marc_node */
 enum YAZ_MARC_NODE_TYPE
-{ 
+{
     YAZ_MARC_DATAFIELD,
     YAZ_MARC_CONTROLFIELD,
     YAZ_MARC_COMMENT,
@@ -140,10 +139,12 @@ static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
 
 static int marc_exec_leader(const char *leader_spec, char *leader,
                             size_t size);
+#if YAZ_HAVE_XML2
 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
-                                        const char *ns, 
+                                        const char *ns,
                                         const char *format,
                                         const char *type);
+#endif
 
 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
 {
@@ -243,13 +244,17 @@ void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
     mt->subfield_pp = &n->u.datafield.subfields;
 }
 
-// Magic function: adds a attribute value to the element name if it is plain characters.
-// if not, and if the attribute name is not null, it will append a attribute element with the value
-// if attribute name is null it will return a non-zero value meaning it couldnt handle the value.
+/** \brief adds a attribute value to the element name if it is plain chars
 
-int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len)
+    If not, and if the attribute name is not null, it will append a
+    attribute element with the value if attribute name is null it will
+    return a non-zero value meaning it couldnt handle the value.
+*/
+static int element_name_append_attribute_value(
+    yaz_marc_t mt, WRBUF buffer,
+    const char *attribute_name, char *code_data, size_t code_len)
 {
-    // TODO Map special codes to something possible for XML ELEMENT names
+    /* TODO Map special codes to something possible for XML ELEMENT names */
 
     int encode = 0;
     int index = 0;
@@ -261,7 +266,7 @@ int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char
               (code_data[index] >= 'A' && code_data[index] <= 'Z')))
             encode = 1;
     }
-    // Add as attribute
+    /* Add as attribute */
     if (encode && attribute_name)
         wrbuf_printf(buffer, " %s=\"", attribute_name);
 
@@ -271,7 +276,7 @@ int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char
         success = -1;
 
     if (encode && attribute_name)
-        wrbuf_printf(buffer, "\"");    // return error if we couldn't handle it.
+        wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
     return success;
 }
 
@@ -298,7 +303,7 @@ void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicator
     n->u.datafield.indicator = indicators;
     n->u.datafield.subfields = 0;
 
-    // make subfield_pp the current (last one)
+    /* make subfield_pp the current (last one) */
     mt->subfield_pp = &n->u.datafield.subfields;
 }
 
@@ -337,6 +342,18 @@ void yaz_marc_add_subfield(yaz_marc_t mt,
     }
 }
 
+static void check_ascii(yaz_marc_t mt, char *leader, int offset,
+                        int ch_default)
+{
+    if (leader[offset] < ' ' || leader[offset] > 127)
+    {
+        yaz_marc_cprintf(mt,
+                         "Leader character at offset %d is non-ASCII. "
+                         "Setting value to '%c'", offset, ch_default);
+        leader[offset] = ch_default;
+    }
+}
+
 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
                          int *indicator_length,
                          int *identifier_length,
@@ -349,9 +366,14 @@ void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
 
     memcpy(leader, leader_c, 24);
 
+    check_ascii(mt, leader, 5, 'a');
+    check_ascii(mt, leader, 6, 'a');
+    check_ascii(mt, leader, 7, 'a');
+    check_ascii(mt, leader, 8, '#');
+    check_ascii(mt, leader, 9, '#');
     if (!atoi_n_check(leader+10, 1, indicator_length))
     {
-        yaz_marc_cprintf(mt, 
+        yaz_marc_cprintf(mt,
                          "Indicator length at offset 10 should hold a digit."
                          " Assuming 2");
         leader[10] = '2';
@@ -359,7 +381,7 @@ void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
     }
     if (!atoi_n_check(leader+11, 1, identifier_length))
     {
-        yaz_marc_cprintf(mt, 
+        yaz_marc_cprintf(mt,
                          "Identifier length at offset 11 should hold a digit."
                          " Assuming 2");
         leader[11] = '2';
@@ -367,14 +389,17 @@ void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
     }
     if (!atoi_n_check(leader+12, 5, base_address))
     {
-        yaz_marc_cprintf(mt, 
+        yaz_marc_cprintf(mt,
                          "Base address at offsets 12..16 should hold a number."
                          " Assuming 0");
         *base_address = 0;
     }
+    check_ascii(mt, leader, 17, '#');
+    check_ascii(mt, leader, 18, '#');
+    check_ascii(mt, leader, 19, '#');
     if (!atoi_n_check(leader+20, 1, length_data_entry))
     {
-        yaz_marc_cprintf(mt, 
+        yaz_marc_cprintf(mt,
                          "Length data entry at offset 20 should hold a digit."
                          " Assuming 4");
         *length_data_entry = 4;
@@ -390,12 +415,13 @@ void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
     }
     if (!atoi_n_check(leader+22, 1, length_implementation))
     {
-        yaz_marc_cprintf(mt, 
+        yaz_marc_cprintf(mt,
                          "Length implementation at offset 22 should hold a digit."
                          " Assuming 0");
         *length_implementation = 0;
         leader[22] = '0';
     }
+    check_ascii(mt, leader, 23, '0');
 
     if (mt->debug)
     {
@@ -437,6 +463,7 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
             size_t inbytesleft = i;
             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
                                  &outp, &outbytesleft);
+            yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
             if (r != (size_t) (-1))
                 return i;  /* got a complete sequence */
         }
@@ -444,7 +471,7 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
     }
     return 1; /* we don't know */
 }
-                              
+
 void yaz_marc_reset(yaz_marc_t mt)
 {
     nmem_reset(mt->nmem);
@@ -465,7 +492,7 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
             leader = n->u.leader;
             break;
         }
-    
+
     if (!leader)
         return -1;
     if (!atoi_n_check(leader+11, 1, &identifier_length))
@@ -476,7 +503,7 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
         switch(n->which)
         {
         case YAZ_MARC_COMMENT:
-            wrbuf_iconv_write(wr, mt->iconv_cd, 
+            wrbuf_iconv_write(wr, mt->iconv_cd,
                               n->u.comment, strlen(n->u.comment));
             wrbuf_puts(wr, "\n");
             break;
@@ -511,7 +538,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
             leader = n->u.leader;
             break;
         }
-    
+
     if (!leader)
         return -1;
     if (!atoi_n_check(leader+11, 1, &identifier_length))
@@ -529,12 +556,12 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
             {
                 size_t using_code_len = get_subfield_len(mt, s->code_data,
                                                          identifier_length);
-                
-                wrbuf_puts (wr, mt->subfield_str); 
-                wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
+
+                wrbuf_puts (wr, mt->subfield_str);
+                wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
                                   using_code_len);
                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
-                wrbuf_iconv_puts(wr, mt->iconv_cd, 
+                wrbuf_iconv_puts(wr, mt->iconv_cd,
                                  s->code_data + using_code_len);
                 marc_iconv_reset(mt, wr);
             }
@@ -549,7 +576,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
             break;
         case YAZ_MARC_COMMENT:
             wrbuf_puts(wr, "(");
-            wrbuf_iconv_write(wr, mt->iconv_cd, 
+            wrbuf_iconv_write(wr, mt->iconv_cd,
                               n->u.comment, strlen(n->u.comment));
             marc_iconv_reset(mt, wr);
             wrbuf_puts(wr, ")\n");
@@ -623,7 +650,7 @@ static const char *subfield_name[2]         = { "subfield", "s"};
     \retval -1 failure
 */
 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
-                                        const char *ns, 
+                                        const char *ns,
                                         const char *format,
                                         const char *type,
                                         int turbo)
@@ -638,12 +665,12 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
             leader = n->u.leader;
             break;
         }
-    
+
     if (!leader)
         return -1;
     if (!atoi_n_check(leader+11, 1, &identifier_length))
         return -1;
-    
+
     if (mt->enable_collection != no_collection)
     {
         if (mt->enable_collection == collection_first)
@@ -700,7 +727,7 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                             s->code_data, using_code_len);
                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
-                } 
+                }
                 else
                 {
                     element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
@@ -716,7 +743,7 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
                 wrbuf_puts(wr, ">\n");
             }
             wrbuf_printf(wr, "  </%s", datafield_name[turbo]);
-            //TODO Not CDATA
+            /* TODO Not CDATA */
             if (turbo)
                wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
                                         strlen(n->u.datafield.tag));
@@ -733,7 +760,7 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
             }
             else
             {
-                //TODO convert special
+                /* TODO convert special */
                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
                                        strlen(n->u.controlfield.tag));
                 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
@@ -743,7 +770,7 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
                                     strlen(n->u.controlfield.data));
             marc_iconv_reset(mt, wr);
             wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
-            //TODO convert special
+            /* TODO convert special */
             if (turbo)
                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
                                        strlen(n->u.controlfield.tag));
@@ -767,7 +794,7 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
 }
 
 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
-                                     const char *ns, 
+                                     const char *ns,
                                      const char *format,
                                      const char *type,
                                      int turbo)
@@ -846,7 +873,7 @@ void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
     struct yaz_marc_subfield *s;
     WRBUF subfield_name = wrbuf_alloc();
 
-    //TODO consider if safe
+    /* TODO consider if safe */
     char field[10];
     field[0] = 'd';
     strncpy(field + 1, n->u.datafield.tag, 3);
@@ -860,7 +887,7 @@ void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
         {
             char ind_str[6];
             char ind_val[2];
-            
+
             ind_val[0] = n->u.datafield.indicator[i];
             ind_val[1] = '\0';
             sprintf(ind_str, "%s%d", indicator_name[1], i+1);
@@ -876,7 +903,7 @@ void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
         wrbuf_rewind(wr_cdata);
         wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
         marc_iconv_reset(mt, wr_cdata);
-        
+
         wrbuf_rewind(subfield_name);
         wrbuf_puts(subfield_name, "s");
         not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
@@ -885,7 +912,7 @@ void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
                                        BAD_CAST wrbuf_cstr(wr_cdata));
         if (not_written)
         {
-            // Generate code attribute value and add
+            /* Generate code attribute value and add */
             wrbuf_rewind(wr_cdata);
             wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
             xmlNewProp(ptr_subfield, BAD_CAST "code",  BAD_CAST wrbuf_cstr(wr_cdata));
@@ -895,7 +922,7 @@ void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
 }
 
 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
-                                        const char *ns, 
+                                        const char *ns,
                                         const char *format,
                                         const char *type)
 {
@@ -912,7 +939,7 @@ static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
             leader = n->u.leader;
             break;
         }
-    
+
     if (!leader)
         return -1;
     if (!atoi_n_check(leader+11, 1, &identifier_length))
@@ -937,7 +964,7 @@ static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
         char field[10];
         field[0] = 'c';
         field[4] = '\0';
-            
+
         switch(n->which)
         {
         case YAZ_MARC_DATAFIELD:
@@ -947,7 +974,7 @@ static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
             wrbuf_rewind(wr_cdata);
             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
             marc_iconv_reset(mt, wr_cdata);
-            
+
             strncpy(field + 1, n->u.controlfield.tag, 3);
             ptr = xmlNewTextChild(record_ptr, ns_record,
                                   BAD_CAST field,
@@ -958,13 +985,9 @@ static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
             xmlAddChild(record_ptr, ptr);
             break;
         case YAZ_MARC_LEADER:
-        {
-            char *field = "leader";
-            field = "l";
-            xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
+            xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
                             BAD_CAST n->u.leader);
-        }
-        break;
+            break;
         }
     }
     wrbuf_destroy(wr_cdata);
@@ -973,7 +996,7 @@ static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
 
 
 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
-                       const char *ns, 
+                       const char *ns,
                        const char *format,
                        const char *type)
 {
@@ -990,7 +1013,7 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
             leader = n->u.leader;
             break;
         }
-    
+
     if (!leader)
         return -1;
     if (!atoi_n_check(leader+11, 1, &identifier_length))
@@ -1056,11 +1079,11 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
             wrbuf_rewind(wr_cdata);
             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
             marc_iconv_reset(mt, wr_cdata);
-            
+
             ptr = xmlNewTextChild(record_ptr, ns_record,
                                   BAD_CAST "controlfield",
                                   BAD_CAST wrbuf_cstr(wr_cdata));
-            
+
             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
             break;
         case YAZ_MARC_COMMENT:
@@ -1091,11 +1114,11 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     const char *leader = 0;
     WRBUF wr_dir, wr_head, wr_data_tmp;
     int base_address;
-    
+
     for (n = mt->nodes; n; n = n->next)
         if (n->which == YAZ_MARC_LEADER)
             leader = n->u.leader;
-    
+
     if (!leader)
         return -1;
     if (!atoi_n_check(leader+10, 1, &indicator_length))
@@ -1138,7 +1161,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
 
             wrbuf_rewind(wr_data_tmp);
-            wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
+            wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
                              n->u.controlfield.data);
             marc_iconv_reset(mt, wr_data_tmp);
             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
@@ -1173,7 +1196,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     wrbuf_printf(wr_head, "%05d", base_address);
     /* from "original" leader */
     wrbuf_write(wr_head, leader+17, 7);
-    
+
     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
     wrbuf_destroy(wr_head);
@@ -1187,8 +1210,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
         switch(n->which)
         {
         case YAZ_MARC_DATAFIELD:
-            wrbuf_printf(wr, "%.*s", indicator_length,
-                         n->u.datafield.indicator);
+            wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
             for (s = n->u.datafield.subfields; s; s = s->next)
             {
                 wrbuf_putc(wr, ISO2709_IDFS);
@@ -1305,7 +1327,7 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
         {
             const char *vp = strchr(val+1, '\'');
             size_t len;
-            
+
             if (!vp)
                 return -1;
             len = vp-val-1;
@@ -1331,7 +1353,7 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
 
 int yaz_marc_decode_formatstr(const char *arg)
 {
-    int mode = -1; 
+    int mode = -1;
     if (!strcmp(arg, "marc"))
         mode = YAZ_MARC_ISO2709;
     if (!strcmp(arg, "marcxml"))