Able to make valid turbo marcxml not using libxml2.
[yaz-moved-to-github.git] / src / marcdisp.c
index 10927cd..8a40939 100644 (file)
@@ -87,10 +87,10 @@ struct yaz_marc_subfield {
 struct yaz_marc_t_ {
     WRBUF m_wr;
     NMEM nmem;
-    int xml;
+    int input_format;
+    int output_format;
     int debug;
     int write_using_libxml2;
-    int turbo_format;
     enum yaz_collection_state enable_collection;
     yaz_iconv_t iconv_cd;
     char subfield_str[8];
@@ -104,7 +104,7 @@ struct yaz_marc_t_ {
 yaz_marc_t yaz_marc_create(void)
 {
     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
-    mt->xml = YAZ_MARC_LINE;
+    mt->output_format = YAZ_MARC_LINE;
     mt->debug = 0;
     mt->write_using_libxml2 = 0;
     mt->enable_collection = no_collection;
@@ -162,6 +162,16 @@ void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
 }
+
+void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
+                                   const xmlNode *ptr_data)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_CONTROLFIELD;
+    n->u.controlfield.tag = tag;
+    n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
+}
+
 #endif
 
 
@@ -245,6 +255,25 @@ void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
     /* make subfield_pp the current (last one) */
     mt->subfield_pp = &n->u.datafield.subfields;
 }
+
+struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_DATAFIELD;
+    n->u.datafield.tag = tag_value;
+    n->u.datafield.indicator = 0;
+    n->u.datafield.subfields = 0;
+
+    /* make subfield_pp the current (last one) */
+    mt->subfield_pp = &n->u.datafield.subfields;
+    return n;
+}
+
+void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
+{
+    n->u.datafield.indicator = indicator;
+}
+
 #endif
 
 void yaz_marc_add_subfield(yaz_marc_t mt,
@@ -504,9 +533,10 @@ int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
 {
     if (mt->enable_collection == collection_second)
     {
-        switch(mt->xml)
+        switch(mt->output_format)
         {
         case YAZ_MARC_MARCXML:
+        case YAZ_MARC_TMARCXML:
             wrbuf_printf(wr, "</collection>\n");
             break;
         case YAZ_MARC_XCHANGE:
@@ -524,7 +554,7 @@ void yaz_marc_enable_collection(yaz_marc_t mt)
 
 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
 {
-    switch(mt->xml)
+    switch(mt->output_format)
     {
     case YAZ_MARC_LINE:
         return yaz_marc_write_line(mt, wr);
@@ -541,6 +571,14 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
     return -1;
 }
 
+const char *collection_name[2]  = { "collection", "collection"};
+const char *record_name[2]     = { "record", "r"};
+const char *leader_name[2]     = { "leader", "l"};
+const char *controlfield_name[2]= { "controlfield", "c"};
+const char *datafield_name[2]          = { "datafield", "d"};
+const char *subfield_name[2]   = { "subfield", "s"};
+
+
 /** \brief common MARC XML/Xchange writer
     \param mt handle
     \param wr WRBUF output
@@ -557,6 +595,162 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
     int identifier_length;
     const char *leader = 0;
 
+    int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
+
+    for (n = mt->nodes; n; n = n->next)
+        if (n->which == YAZ_MARC_LEADER)
+        {
+            leader = n->u.leader;
+            break;
+        }
+    
+    if (!leader)
+        return -1;
+    if (!atoi_n_check(leader+11, 1, &identifier_length))
+        return -1;
+    
+    if (mt->enable_collection != no_collection)
+    {
+        if (mt->enable_collection == collection_first)
+            wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
+        mt->enable_collection = collection_second;
+        wrbuf_printf(wr, "<%s", record_name[turbo]);
+    }
+    else
+    {
+        wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
+    }
+    if (format)
+        wrbuf_printf(wr, " format=\"%.80s\"", format);
+    if (type)
+        wrbuf_printf(wr, " type=\"%.80s\"", type);
+    wrbuf_printf(wr, ">\n");
+    for (n = mt->nodes; n; n = n->next)
+    {
+        struct yaz_marc_subfield *s;
+
+        switch(n->which)
+        {
+        case YAZ_MARC_DATAFIELD:
+
+               wrbuf_printf(wr, "  <%s", datafield_name[turbo]);
+            if (!turbo) {
+               wrbuf_printf(wr, " tag=\"");
+               wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
+                                    strlen(n->u.datafield.tag));
+                   wrbuf_printf(wr, "\"");
+               if (n->u.datafield.indicator)
+                   {
+                   int i;
+                       for (i = 0; n->u.datafield.indicator[i]; i++)
+                       {
+                       wrbuf_printf(wr, " ind%d=\"", i+1);
+                           wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                             n->u.datafield.indicator+i, 1);
+                           wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
+                       }
+               }
+                   wrbuf_printf(wr, ">\n");
+            } else {
+               // TODO Not CDATA.
+               wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
+                               strlen(n->u.datafield.tag));
+               // Write tag
+               wrbuf_printf(wr, ">\n");
+               if (n->u.datafield.indicator)
+               {
+                       int i;
+                       for (i = 0; n->u.datafield.indicator[i]; i++)
+                       {
+                               wrbuf_printf(wr, "    <i%d>", i+1);
+                               wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                               n->u.datafield.indicator+i, 1);
+                               wrbuf_printf(wr, "</i%d>", i+1);
+                        wrbuf_puts(wr, "\n");
+                       }
+               }
+            }
+            for (s = n->u.datafield.subfields; s; s = s->next)
+            {
+                size_t using_code_len = get_subfield_len(mt, s->code_data,
+                                                         identifier_length);
+                wrbuf_printf(wr, "    <%s", subfield_name[turbo]);
+                               if (!turbo) {
+                                       wrbuf_printf(wr, " code=\"");
+                       wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                        s->code_data, using_code_len);
+                       wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
+                               } else {
+                                       // TODO check this. encode special characters.
+                       wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                        s->code_data, using_code_len);
+                                       wrbuf_puts(wr, ">");
+                               }
+                wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                        s->code_data + using_code_len,
+                                        strlen(s->code_data + using_code_len));
+                marc_iconv_reset(mt, wr);
+                               wrbuf_printf(wr, "</%s", subfield_name[turbo]);
+               wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                    s->code_data, using_code_len);
+                wrbuf_puts(wr, ">\n");
+            }
+            wrbuf_printf(wr, "  </%s", datafield_name[turbo]);
+               //TODO Not CDATA
+            wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
+                       strlen(n->u.datafield.tag));
+            wrbuf_printf(wr, ">\n", datafield_name[turbo]);
+            break;
+        case YAZ_MARC_CONTROLFIELD:
+               wrbuf_printf(wr, "  <%s", controlfield_name[turbo]);
+               if (!turbo) {
+               wrbuf_printf(wr, " tag=\"");
+                       wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+                                       strlen(n->u.controlfield.tag));
+                       wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
+               }
+               else {
+                       //TODO convert special
+                       wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+                                       strlen(n->u.controlfield.tag));
+                       wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
+               }
+               wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                                                       n->u.controlfield.data,
+                                                                       strlen(n->u.controlfield.data));
+               marc_iconv_reset(mt, wr);
+               wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
+               //TODO convert special
+               wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+                               strlen(n->u.controlfield.tag));
+               wrbuf_puts(wr, ">\n");
+            break;
+        case YAZ_MARC_COMMENT:
+            wrbuf_printf(wr, "<!-- ");
+            wrbuf_puts(wr, n->u.comment);
+            wrbuf_printf(wr, " -->\n");
+            break;
+        case YAZ_MARC_LEADER:
+            wrbuf_printf(wr, "  <%s>", leader_name[turbo]);
+            wrbuf_iconv_write_cdata(wr, 
+                                    0 /* no charset conversion for leader */,
+                                    n->u.leader, strlen(n->u.leader));
+            wrbuf_printf(wr, "  </%s>", leader_name[turbo]);
+        }
+    }
+    wrbuf_printf(wr, "</%s>", record_name[turbo]);
+    return 0;
+}
+
+static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
+                                      const char *ns, 
+                                      const char *format,
+                                      const char *type)
+{
+    struct yaz_marc_node *n;
+    int identifier_length;
+    const char *leader = 0;
+
     for (n = mt->nodes; n; n = n->next)
         if (n->which == YAZ_MARC_LEADER)
         {
@@ -655,6 +849,7 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
     return 0;
 }
 
+
 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
                                      const char *ns, 
                                      const char *format,
@@ -666,9 +861,9 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
         int ret;
         xmlNode *root_ptr;
 
-        if (!mt->turbo_format)
+        if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
                ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
-        else
+        else // Check for Turbo XML
                ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
         if (ret == 0)
         {
@@ -699,7 +894,10 @@ int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
     if (!mt->leader_spec)
         yaz_marc_modify_leader(mt, 9, "a");
-    return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
+    char *name_space = "http://www.loc.gov/MARC21/slim";
+    if (mt->output_format == YAZ_MARC_TMARCXML)
+       name_space = "http://www.indexdata.com/MARC21/turboxml";
+    return yaz_marc_write_marcxml_ns(mt, wr, name_space,
                                      0, 0);
 }
 
@@ -714,11 +912,11 @@ int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
 
 #if YAZ_HAVE_XML2
 
-void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
+void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
 {
     xmlNode *ptr;
     struct yaz_marc_subfield *s;
-    int turbo = mt->turbo_format;
+    int turbo = mt->output_format == YAZ_MARC_TMARCXML;
     if (!turbo) {
         ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
         xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
@@ -728,6 +926,7 @@ void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *re
        char field[10];
        field[0] = 'd';
         strncpy(field + 1, n->u.datafield.tag, 3);
+        field[4] = '\0';
         ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
     }
     if (n->u.datafield.indicator)
@@ -738,10 +937,16 @@ void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *re
             char ind_str[6];
             char ind_val[2];
             
-            sprintf(ind_str, "ind%d", i+1);
             ind_val[0] = n->u.datafield.indicator[i];
             ind_val[1] = '\0';
-            xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
+            if (!turbo) {
+                sprintf(ind_str, "ind%d", i+1);
+               xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
+            }
+            else {
+                sprintf(ind_str, "i%d", i+1);
+               xmlNewTextChild(ptr, ns_record, BAD_CAST ind_str, BAD_CAST ind_val);
+            }
         }
     }
        WRBUF subfield_name = wrbuf_alloc();
@@ -772,13 +977,21 @@ void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *re
                                (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
                {
                        wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
-               ptr_subfield = xmlNewTextChild(ptr, ns_record,
-                               BAD_CAST wrbuf_cstr(subfield_name),
-                               BAD_CAST wrbuf_cstr(wr_cdata));
                }
-               else
-                       //TODO FIX
-                               yaz_log(YLOG_WARN, "Dropping subfield: %s", s->code_data);
+               else {
+                               char buffer[2*using_code_len + 1];
+                               int index;
+                               for (index = 0; index < using_code_len; index++) {
+                                       sprintf(buffer + 2*index, "%02X", (unsigned char) s->code_data[index] & 0xFF);
+                               };
+                               buffer[2*(index+1)] = 0;
+                               wrbuf_puts(subfield_name, "-");
+                               wrbuf_puts(subfield_name, buffer);
+                       yaz_log(YLOG_WARN, "Using numeric value in element name: %s", buffer);
+               }
+               ptr_subfield = xmlNewTextChild(ptr, ns_record,
+                               BAD_CAST wrbuf_cstr(subfield_name),
+                               BAD_CAST wrbuf_cstr(wr_cdata));
         }
     }
        wrbuf_destroy(subfield_name);
@@ -795,7 +1008,7 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
     xmlNode *record_ptr;
     xmlNsPtr ns_record;
     WRBUF wr_cdata = 0;
-    int turbo = mt->turbo_format;
+    int turbo = mt->output_format == YAZ_MARC_TMARCXML;
     for (n = mt->nodes; n; n = n->next)
         if (n->which == YAZ_MARC_LEADER)
         {
@@ -810,7 +1023,7 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
 
     wr_cdata = wrbuf_alloc();
 
-    record_ptr = xmlNewNode(0, BAD_CAST "record");
+    record_ptr = xmlNewNode(0, BAD_CAST "r");
     *root_ptr = record_ptr;
 
     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
@@ -828,7 +1041,7 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
         switch(n->which)
         {
         case YAZ_MARC_DATAFIELD:
-               add_marc_datafield_xml2(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
+               add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
             break;
         case YAZ_MARC_CONTROLFIELD:
             wrbuf_rewind(wr_cdata);
@@ -846,6 +1059,7 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
                char field[10];
                                field[0] = 'c';
                 strncpy(field + 1, n->u.controlfield.tag, 3);
+                field[4] = '\0';
                 ptr = xmlNewTextChild(record_ptr, ns_record,
                                                                          BAD_CAST field,
                                                                          BAD_CAST wrbuf_cstr(wr_cdata));
@@ -1141,12 +1355,50 @@ int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
     return r;
 }
 
-void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
+void yaz_marc_set_read_format(yaz_marc_t mt, int format)
+{
+    if (mt)
+        mt->input_format = format;
+}
+
+int yaz_marc_get_read_format(yaz_marc_t mt)
+{
+    if (mt)
+        return mt->input_format;
+    return -1;
+}
+
+
+void yaz_marc_set_write_format(yaz_marc_t mt, int format)
+{
+    if (mt) {
+        mt->output_format = format;
+/*
+        // Force using libxml2
+        if (mt->output_format == YAZ_MARC_TMARCXML)
+               mt->write_using_libxml2 = 1;
+*/
+    }
+}
+
+int yaz_marc_get_write_format(yaz_marc_t mt)
 {
     if (mt)
-        mt->xml = xmlmode;
+        return mt->output_format;
+    return -1;
 }
 
+
+/**
+ * Deprecated, use yaz_marc_set_write_format
+ */
+void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
+{
+       yaz_marc_set_write_format(mt, xmlmode);
+}
+
+
+
 void yaz_marc_debug(yaz_marc_t mt, int level)
 {
     if (mt)
@@ -1254,9 +1506,9 @@ void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
     mt->write_using_libxml2 = enable;
 }
 
-void yaz_marc_write_turbo_format(yaz_marc_t mt, int enable)
+int yaz_marc_is_turbo_format(yaz_marc_t mt)
 {
-    mt->turbo_format = enable;
+    return mt->output_format == YAZ_MARC_TMARCXML;
 }