Added turbo marcxml format to output
[yaz-moved-to-github.git] / src / marcdisp.c
index 2f5f0d5..10927cd 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2008 Index Data
+ * Copyright (C) 1995-2010 Index Data
  * See the file LICENSE for details.
  */
 
@@ -90,6 +90,7 @@ struct yaz_marc_t_ {
     int xml;
     int debug;
     int write_using_libxml2;
+    int turbo_format;
     enum yaz_collection_state enable_collection;
     yaz_iconv_t iconv_cd;
     char subfield_str[8];
@@ -274,16 +275,6 @@ void yaz_marc_add_subfield(yaz_marc_t mt,
     }
 }
 
-int atoi_n_check(const char *buf, int size, int *val)
-{
-    int i;
-    for (i = 0; i < size; i++)
-        if (!isdigit(i[(const unsigned char *) buf]))
-            return 0;
-    *val = atoi_n(buf, size);
-    return 1;
-}
-
 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
                          int *indicator_length,
                          int *identifier_length,
@@ -425,7 +416,7 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
         case YAZ_MARC_COMMENT:
             wrbuf_iconv_write(wr, mt->iconv_cd, 
                               n->u.comment, strlen(n->u.comment));
-            wrbuf_puts(wr, ")\n");
+            wrbuf_puts(wr, "\n");
             break;
         default:
             break;
@@ -434,6 +425,17 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
     return 0;
 }
 
+static size_t get_subfield_len(yaz_marc_t mt, const char *data,
+                               int identifier_length)
+{
+    /* if identifier length is 2 (most MARCs) or less (probably an error),
+       the code is a single character .. However we've
+       seen multibyte codes, so see how big it really is */
+    if (identifier_length > 2)
+        return identifier_length - 1;
+    else
+        return cdata_one_character(mt, data);
+}
 
 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
 {
@@ -463,13 +465,8 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
                          n->u.datafield.indicator);
             for (s = n->u.datafield.subfields; s; s = s->next)
             {
-                /* if identifier length is 2 (most MARCs),
-                   the code is a single character .. However we've
-                   seen multibyte codes, so see how big it really is */
-                size_t using_code_len = 
-                    (identifier_length != 2) ? identifier_length - 1
-                    :
-                    cdata_one_character(mt, s->code_data);
+                size_t using_code_len = get_subfield_len(mt, s->code_data,
+                                                         identifier_length);
                 
                 wrbuf_puts (wr, mt->subfield_str); 
                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
@@ -532,6 +529,7 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
     case YAZ_MARC_LINE:
         return yaz_marc_write_line(mt, wr);
     case YAZ_MARC_MARCXML:
+    case YAZ_MARC_TMARCXML:
         return yaz_marc_write_marcxml(mt, wr);
     case YAZ_MARC_XCHANGE:
         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
@@ -612,14 +610,8 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
             wrbuf_printf(wr, ">\n");
             for (s = n->u.datafield.subfields; s; s = s->next)
             {
-                /* if identifier length is 2 (most MARCs),
-                   the code is a single character .. However we've
-                   seen multibyte codes, so see how big it really is */
-                size_t using_code_len = 
-                    (identifier_length != 2) ? identifier_length - 1
-                    :
-                    cdata_one_character(mt, s->code_data);
-                
+                size_t using_code_len = get_subfield_len(mt, s->code_data,
+                                                         identifier_length);
                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                         s->code_data, using_code_len);
@@ -674,7 +666,10 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
         int ret;
         xmlNode *root_ptr;
 
-        ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
+        if (!mt->turbo_format)
+               ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
+        else
+               ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
         if (ret == 0)
         {
             xmlChar *buf_out;
@@ -713,12 +708,170 @@ int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
                                const char *type)
 {
     return yaz_marc_write_marcxml_ns(mt, wr,
-                                     "http://www.bs.dk/standards/MarcXchange",
+                                     "info:lc/xmlns/marcxchange-v1",
                                      0, 0);
 }
 
-
 #if YAZ_HAVE_XML2
+
+void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
+{
+    xmlNode *ptr;
+    struct yaz_marc_subfield *s;
+    int turbo = mt->turbo_format;
+    if (!turbo) {
+        ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
+        xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
+    }
+    else {
+        //TODO consider if safe
+       char field[10];
+       field[0] = 'd';
+        strncpy(field + 1, n->u.datafield.tag, 3);
+        ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
+    }
+    if (n->u.datafield.indicator)
+    {
+        int i;
+        for (i = 0; n->u.datafield.indicator[i]; i++)
+        {
+            char ind_str[6];
+            char ind_val[2];
+            
+            sprintf(ind_str, "ind%d", i+1);
+            ind_val[0] = n->u.datafield.indicator[i];
+            ind_val[1] = '\0';
+            xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
+        }
+    }
+       WRBUF subfield_name = wrbuf_alloc();
+    for (s = n->u.datafield.subfields; s; s = s->next)
+    {
+        xmlNode *ptr_subfield;
+        size_t using_code_len = get_subfield_len(mt, s->code_data,
+                                                 identifier_length);
+        wrbuf_rewind(wr_cdata);
+        wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
+        marc_iconv_reset(mt, wr_cdata);
+        
+        if (!turbo) {
+               ptr_subfield = xmlNewTextChild(
+                               ptr, ns_record,
+                               BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
+               wrbuf_rewind(wr_cdata);
+               wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
+               xmlNewProp(ptr_subfield, BAD_CAST "code",
+                               BAD_CAST wrbuf_cstr(wr_cdata));
+        }
+        else { // Turbo format
+               wrbuf_rewind(subfield_name);
+               wrbuf_puts(subfield_name, "s");
+               // TODO Map special codes to something possible for XML ELEMENT names
+               if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
+                   (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
+                               (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
+               {
+                       wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
+               ptr_subfield = xmlNewTextChild(ptr, ns_record,
+                               BAD_CAST wrbuf_cstr(subfield_name),
+                               BAD_CAST wrbuf_cstr(wr_cdata));
+               }
+               else
+                       //TODO FIX
+                               yaz_log(YLOG_WARN, "Dropping subfield: %s", s->code_data);
+        }
+    }
+       wrbuf_destroy(subfield_name);
+}
+
+int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
+                       const char *ns, 
+                       const char *format,
+                       const char *type)
+{
+    struct yaz_marc_node *n;
+    int identifier_length;
+    const char *leader = 0;
+    xmlNode *record_ptr;
+    xmlNsPtr ns_record;
+    WRBUF wr_cdata = 0;
+    int turbo = mt->turbo_format;
+    for (n = mt->nodes; n; n = n->next)
+        if (n->which == YAZ_MARC_LEADER)
+        {
+            leader = n->u.leader;
+            break;
+        }
+    
+    if (!leader)
+        return -1;
+    if (!atoi_n_check(leader+11, 1, &identifier_length))
+        return -1;
+
+    wr_cdata = wrbuf_alloc();
+
+    record_ptr = xmlNewNode(0, BAD_CAST "record");
+    *root_ptr = record_ptr;
+
+    ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
+    xmlSetNs(record_ptr, ns_record);
+
+    if (format)
+        xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
+    if (type)
+        xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
+    for (n = mt->nodes; n; n = n->next)
+    {
+        struct yaz_marc_subfield *s;
+        xmlNode *ptr;
+
+        switch(n->which)
+        {
+        case YAZ_MARC_DATAFIELD:
+               add_marc_datafield_xml2(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
+            break;
+        case YAZ_MARC_CONTROLFIELD:
+            wrbuf_rewind(wr_cdata);
+            wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
+            marc_iconv_reset(mt, wr_cdata);
+            
+            if (!turbo) {
+                               ptr = xmlNewTextChild(record_ptr, ns_record,
+                                                                         BAD_CAST "controlfield",
+                                                                         BAD_CAST wrbuf_cstr(wr_cdata));
+                               xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
+            }
+            else {
+               // TODO required iconv?
+               char field[10];
+                               field[0] = 'c';
+                strncpy(field + 1, n->u.controlfield.tag, 3);
+                ptr = xmlNewTextChild(record_ptr, ns_record,
+                                                                         BAD_CAST field,
+                                                                         BAD_CAST wrbuf_cstr(wr_cdata));
+            }
+
+            break;
+        case YAZ_MARC_COMMENT:
+            ptr = xmlNewComment(BAD_CAST n->u.comment);
+            xmlAddChild(record_ptr, ptr);
+            break;
+        case YAZ_MARC_LEADER:
+                       {
+                               char *field = "leader";
+                               if (turbo)
+                                       field = "l";
+                               xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
+                                                               BAD_CAST n->u.leader);
+                       }
+            break;
+        }
+    }
+    wrbuf_destroy(wr_cdata);
+    return 0;
+}
+
+
 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
                        const char *ns, 
                        const char *format,
@@ -782,20 +935,14 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
             for (s = n->u.datafield.subfields; s; s = s->next)
             {
                 xmlNode *ptr_subfield;
-                /* if identifier length is 2 (most MARCs),
-                   the code is a single character .. However we've
-                   seen multibyte codes, so see how big it really is */
-                size_t using_code_len = 
-                    (identifier_length != 2) ? identifier_length - 1
-                    :
-                    cdata_one_character(mt, s->code_data);
-
+                size_t using_code_len = get_subfield_len(mt, s->code_data,
+                                                         identifier_length);
                 wrbuf_rewind(wr_cdata);
                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
                                  s->code_data + using_code_len);
                 marc_iconv_reset(mt, wr_cdata);
                 ptr_subfield = xmlNewTextChild(
-                    ptr, ns_record, 
+                    ptr, ns_record,
                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
 
                 wrbuf_rewind(wr_cdata);
@@ -829,6 +976,10 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
     wrbuf_destroy(wr_cdata);
     return 0;
 }
+
+
+
+
 #endif
 
 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
@@ -1051,7 +1202,7 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
         if (no < 2 || no_read < 3)
             return -1;
-        if (pos < 0 || pos >= size)
+        if (pos < 0 || (size_t) pos >= size)
             return -1;
 
         if (*val == '\'')
@@ -1089,6 +1240,8 @@ int yaz_marc_decode_formatstr(const char *arg)
         mode = YAZ_MARC_ISO2709;
     if (!strcmp(arg, "marcxml"))
         mode = YAZ_MARC_MARCXML;
+    if (!strcmp(arg, "tmarcxml"))
+        mode = YAZ_MARC_TMARCXML;
     if (!strcmp(arg, "marcxchange"))
         mode = YAZ_MARC_XCHANGE;
     if (!strcmp(arg, "line"))
@@ -1101,9 +1254,16 @@ void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
     mt->write_using_libxml2 = enable;
 }
 
+void yaz_marc_write_turbo_format(yaz_marc_t mt, int enable)
+{
+    mt->turbo_format = enable;
+}
+
+
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab