Fixed bug #643: Bad sequence for MARC conversions from UTF-8 to MARC-8.
[yaz-moved-to-github.git] / src / marcdisp.c
index 846c38f..1d408be 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdisp.c,v 1.27 2006-04-20 19:47:01 adam Exp $
+ * $Id: marcdisp.c,v 1.33 2006-08-28 12:34:40 adam Exp $
  */
 
 /**
@@ -26,7 +26,7 @@
 #include <yaz/wrbuf.h>
 #include <yaz/yaz-util.h>
 
-#if HAVE_XML2
+#if YAZ_HAVE_XML2
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 #endif
@@ -132,25 +132,6 @@ void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
     n->u.comment = nmem_strdup(mt->nmem, comment);
 }
 
-#if HAVE_XML2
-static char *yaz_marc_get_xml_text(const xmlNode *ptr_cdata, NMEM nmem)
-{
-    char *cdata;
-    int len = 0;
-    const xmlNode *ptr;
-
-    for (ptr = ptr_cdata; ptr; ptr = ptr->next)
-        if (ptr->type == XML_TEXT_NODE)
-            len += xmlStrlen(ptr->content);
-    cdata = (char *) nmem_malloc(nmem, len+1);
-    *cdata = '\0';
-    for (ptr = ptr_cdata; ptr; ptr = ptr->next)
-        if (ptr->type == XML_TEXT_NODE)
-            strcat(cdata, (const char *) ptr->content);
-    return cdata;
-}
-#endif
-
 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
 {
     va_list ap;
@@ -200,14 +181,14 @@ void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
     }
 }
 
-#if HAVE_XML2
+#if YAZ_HAVE_XML2
 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
                                    const xmlNode *ptr_data)
 {
     struct yaz_marc_node *n = yaz_marc_add_node(mt);
     n->which = YAZ_MARC_CONTROLFIELD;
-    n->u.controlfield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
-    n->u.controlfield.data = yaz_marc_get_xml_text(ptr_data, mt->nmem);
+    n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
+    n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
 }
 #endif
 
@@ -225,13 +206,13 @@ void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
     mt->subfield_pp = &n->u.datafield.subfields;
 }
 
-#if HAVE_XML2
+#if YAZ_HAVE_XML2
 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
                                 const char *indicator, size_t indicator_len)
 {
     struct yaz_marc_node *n = yaz_marc_add_node(mt);
     n->which = YAZ_MARC_DATAFIELD;
-    n->u.datafield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
+    n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
     n->u.datafield.indicator =
         nmem_strdupn(mt->nmem, indicator, indicator_len);
     n->u.datafield.subfields = 0;
@@ -370,16 +351,6 @@ void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
 }
 
-static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
-{
-    if (mt->xml == YAZ_MARC_ISO2709)
-        wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
-    else if (mt->xml == YAZ_MARC_LINE)
-        wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
-    else
-        wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
-}
-
 /* try to guess how many bytes the identifier really is! */
 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
 {
@@ -451,15 +422,20 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
                 wrbuf_puts (wr, mt->subfield_str); 
                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
                                   using_code_len);
-                wrbuf_printf(wr, " ");
+                wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
                                  s->code_data + using_code_len);
+                wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
+                wr->pos--;
             }
             wrbuf_puts (wr, mt->endline_str);
             break;
         case YAZ_MARC_CONTROLFIELD:
-            wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
+            wrbuf_printf(wr, "%s", n->u.controlfield.tag);
+            wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
+            wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
+            wr->pos--;
             wrbuf_puts (wr, mt->endline_str);
             break;
         case YAZ_MARC_COMMENT:
@@ -484,15 +460,24 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
     case YAZ_MARC_MARCXML:
         return yaz_marc_write_marcxml(mt, wr);
     case YAZ_MARC_XCHANGE:
-        return yaz_marc_write_marcxchange(mt, wr);
+        return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
     case YAZ_MARC_ISO2709:
         return yaz_marc_write_iso2709(mt, wr);
     }
     return -1;
 }
 
+/** \brief common MARC XML/Xchange writer
+    \param mt handle
+    \param wr WRBUF output
+    \param ns XMLNS for the elements
+    \param format record format (e.g. "MARC21")
+    \param type record type (e.g. "Bibliographic")
+*/
 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
-                                     const char *ns)
+                                     const char *ns, 
+                                     const char *format,
+                                     const char *type)
 {
     struct yaz_marc_node *n;
     int identifier_length;
@@ -510,10 +495,16 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
     if (!atoi_n_check(leader+11, 1, &identifier_length))
         return -1;
 
-    wrbuf_printf(wr, "<record xmlns=\"%s\">\n", ns);
+    wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
+    if (format)
+        wrbuf_printf(wr, " format=\"%.80s\"", format);
+    if (type)
+        wrbuf_printf(wr, " type=\"%.80s\"", type);
+    wrbuf_printf(wr, ">\n");
     for (n = mt->nodes; n; n = n->next)
     {
         struct yaz_marc_subfield *s;
+
         switch(n->which)
         {
         case YAZ_MARC_DATAFIELD:
@@ -529,7 +520,7 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
                     wrbuf_printf(wr, " ind%d=\"", i+1);
                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                           n->u.datafield.indicator+i, 1);
-                    wrbuf_printf(wr, "\"");
+                    wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
                 }
             }
             wrbuf_printf(wr, ">\n");
@@ -543,14 +534,15 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
                     :
                     cdata_one_character(mt, s->code_data);
                 
-                wrbuf_puts(wr, "    <subfield code=\"");
+                wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                         s->code_data, using_code_len);
-                wrbuf_puts(wr, "\">");
+                wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                         s->code_data + using_code_len,
                                         strlen(s->code_data + using_code_len));
-                wrbuf_puts(wr, "</subfield>\n");
+                wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
+                wrbuf_puts(wr, "\n");
             }
             wrbuf_printf(wr, "  </datafield>\n");
             break;
@@ -558,12 +550,15 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
             wrbuf_printf(wr, "  <controlfield tag=\"");
             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
                                     strlen(n->u.controlfield.tag));
-            wrbuf_printf(wr, "\">");
+            wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
-            wrbuf_printf(wr, "</controlfield>\n");
+            wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
+            wrbuf_puts(wr, "\n");
             break;
         case YAZ_MARC_COMMENT:
-            wrbuf_printf(wr, "<!-- %s -->\n", n->u.comment);
+            wrbuf_printf(wr, "<!-- ");
+            wrbuf_puts(wr, n->u.comment);
+            wrbuf_printf(wr, " -->\n");
             break;
         case YAZ_MARC_LEADER:
             wrbuf_printf(wr, "  <leader>");
@@ -580,13 +575,17 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
 {
     yaz_marc_modify_leader(mt, 9, "a");
-    return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim");
+    return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
+                                     0, 0);
 }
 
-int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr)
+int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
+                               const char *format,
+                               const char *type)
 {
     return yaz_marc_write_marcxml_ns(mt, wr,
-                                     "http://www.bs.dk/standards/MarcXchange");
+                                     "http://www.bs.dk/standards/MarcXchange",
+                                     0, 0);
 }
 
 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
@@ -599,7 +598,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     int length_implementation;
     int data_offset = 0;
     const char *leader = 0;
-    WRBUF wr_dir, wr_head;
+    WRBUF wr_dir, wr_head, wr_data_tmp;
     int base_address;
     
     for (n = mt->nodes; n; n = n->next)
@@ -619,24 +618,37 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     if (!atoi_n_check(leader+22, 1, &length_implementation))
         return -1;
 
+    wr_data_tmp = wrbuf_alloc();
     wr_dir = wrbuf_alloc();
     for (n = mt->nodes; n; n = n->next)
     {
         int data_length = 0;
         struct yaz_marc_subfield *s;
+
         switch(n->which)
         {
         case YAZ_MARC_DATAFIELD:
             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
             data_length += indicator_length;
+            wrbuf_rewind(wr_data_tmp);
             for (s = n->u.datafield.subfields; s; s = s->next)
-                data_length += 1+strlen(s->code_data);
-            data_length++;
+            {
+                /* write dummy IDFS + content */
+                wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
+                wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
+            }
+            /* write dummy FS (makes MARC-8 to become ASCII) */
+            wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
+            data_length += wrbuf_len(wr_data_tmp);
             break;
         case YAZ_MARC_CONTROLFIELD:
             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
-            data_length += strlen(n->u.controlfield.data);
-            data_length++;
+
+            wrbuf_rewind(wr_data_tmp);
+            wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
+                             n->u.controlfield.data);
+            wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
+            data_length += wrbuf_len(wr_data_tmp);
             break;
         case YAZ_MARC_COMMENT:
             break;
@@ -671,21 +683,33 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
     wrbuf_free(wr_head, 1);
     wrbuf_free(wr_dir, 1);
+    wrbuf_free(wr_data_tmp, 1);
 
     for (n = mt->nodes; n; n = n->next)
     {
         struct yaz_marc_subfield *s;
+
         switch(n->which)
         {
         case YAZ_MARC_DATAFIELD:
             wrbuf_printf(wr, "%.*s", indicator_length,
                          n->u.datafield.indicator);
             for (s = n->u.datafield.subfields; s; s = s->next)
-                wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data);
-            wrbuf_printf(wr, "%c", ISO2709_FS);
+            {
+                wrbuf_putc(wr, ISO2709_IDFS);
+                wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
+                /* write dummy blank - makes MARC-8 to become ASCII */
+                wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
+                wr->pos--;
+            }
+            wrbuf_putc(wr, ISO2709_FS);
             break;
         case YAZ_MARC_CONTROLFIELD:
-            wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS);
+            wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
+            /* write dummy blank - makes MARC-8 to become ASCII */
+            wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
+            wr->pos--;
+            wrbuf_putc(wr, ISO2709_FS);
             break;
         case YAZ_MARC_COMMENT:
             break;
@@ -697,7 +721,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     return 0;
 }
 
-#if HAVE_XML2
+#if YAZ_HAVE_XML2
 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
 {
     for (; ptr; ptr = ptr->next)