Support read/write MARCXML collections.
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 17 Dec 2007 20:59:30 +0000 (20:59 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 17 Dec 2007 20:59:30 +0000 (20:59 +0000)
13 files changed:
include/yaz/marcdisp.h
src/marc_read_xml.c
src/marcdisp.c
test/marc1.xml
test/marc2.xml
test/marc3.xml
test/marc4.xml
test/marc5.xml
test/marc6.xml
test/marc7.xml
test/marc8.xml
test/marc9.xml
util/marcdump.c

index dd92044..75820ce 100644 (file)
@@ -24,7 +24,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/* $Id: marcdisp.h,v 1.29 2007-12-16 11:08:50 adam Exp $ */
+/* $Id: marcdisp.h,v 1.30 2007-12-17 20:59:30 adam Exp $ */
 
 /**
  * \file marcdisp.h
@@ -367,6 +367,19 @@ YAZ_EXPORT void yaz_display_OPAC(WRBUF wrbuf, Z_OPACRecord *r, int flags);
 YAZ_EXPORT void yaz_opac_decode_wrbuf(yaz_marc_t mt, Z_OPACRecord *r, WRBUF wrbuf);
 
 
+/** \brief flushes records
+    \param mt handle
+    \param wrbuf WRBUF for output
+    \retval 0 OK
+    \retval -1 ERROR
+*/  
+YAZ_EXPORT int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr);
+
+/** \brief enables record collection output
+    \param mt handle
+*/  
+YAZ_EXPORT void yaz_marc_enable_collection(yaz_marc_t mt);
+
 YAZ_END_CDECL
 
 #endif
index caf5a55..8f4ffd6 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marc_read_xml.c,v 1.3 2007-05-06 20:12:20 adam Exp $
+ * $Id: marc_read_xml.c,v 1.4 2007-12-17 20:59:30 adam Exp $
  */
 
 /**
@@ -224,6 +224,8 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
 {
 #if YAZ_HAVE_XML2
+    yaz_marc_reset(mt);
+
     for(; ptr; ptr = ptr->next)
         if (ptr->type == XML_ELEMENT_NODE)
         {
index 0dd8aaa..e1c6b03 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdisp.c,v 1.51 2007-09-20 17:22:45 adam Exp $
+ * $Id: marcdisp.c,v 1.52 2007-12-17 20:59:30 adam Exp $
  */
 
 /**
 #include <libxml/tree.h>
 #endif
 
+enum yaz_collection_state {
+    no_collection,
+    collection_first,
+    collection_second
+};
+   
 /** \brief node types for yaz_marc_node */
 enum YAZ_MARC_NODE_TYPE
 { 
@@ -85,6 +91,7 @@ struct yaz_marc_t_ {
     int xml;
     int debug;
     int write_using_libxml2;
+    enum yaz_collection_state enable_collection;
     yaz_iconv_t iconv_cd;
     char subfield_str[8];
     char endline_str[8];
@@ -100,6 +107,7 @@ yaz_marc_t yaz_marc_create(void)
     mt->xml = YAZ_MARC_LINE;
     mt->debug = 0;
     mt->write_using_libxml2 = 0;
+    mt->enable_collection = no_collection;
     mt->m_wr = wrbuf_alloc();
     mt->iconv_cd = 0;
     mt->leader_spec = 0;
@@ -496,6 +504,28 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
     return 0;
 }
 
+int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
+{
+    if (mt->enable_collection == collection_second)
+    {
+        switch(mt->xml)
+        {
+        case YAZ_MARC_MARCXML:
+            wrbuf_printf(wr, "</collection>\n");
+            break;
+        case YAZ_MARC_XCHANGE:
+            wrbuf_printf(wr, "</collection>\n");
+            break;
+        }
+    }
+    return 0;
+}
+
+void yaz_marc_enable_collection(yaz_marc_t mt)
+{
+    mt->enable_collection = collection_first;
+}
+
 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
 {
     switch(mt->xml)
@@ -541,8 +571,18 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
         return -1;
     if (!atoi_n_check(leader+11, 1, &identifier_length))
         return -1;
-
-    wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
+    
+    if (mt->enable_collection != no_collection)
+    {
+        if (mt->enable_collection == collection_first)
+            wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
+        mt->enable_collection = collection_second;
+        wrbuf_printf(wr, "<record");
+    }
+    else
+    {
+        wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
+    }
     if (format)
         wrbuf_printf(wr, " format=\"%.80s\"", format);
     if (type)
index fe6b692..3d0cfaf 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
 <!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
   <leader>00988nam0a32003011  450 </leader>
   <datafield tag="001" ind1="0" ind2="0" ind3="0">
@@ -91,3 +92,4 @@
     <subfield code="i">1970-1979</subfield>
   </datafield>
 </record>
+</collection>
index dd0bc13..b665385 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
 <!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
   <leader>01116nam0a32002171  450 </leader>
   <datafield tag="001" ind1="0" ind2="0" ind3="0">
@@ -98,3 +99,4 @@
     <subfield code="a">1 girl</subfield>
   </datafield>
 </record>
+</collection>
index baab879..2784431 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
 <!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
   <leader>00914naa a2200337   450 </leader>
   <datafield tag="001" ind1=" " ind2=" ">
     <subfield code="a">a00001508</subfield>
   </datafield>
 </record>
+</collection>
index 6587da9..57fa9ca 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
 <!-- Indicator length at offset 10 should hold a digit. Assuming 2 -->
 <!-- Identifier length at offset 11 should hold a digit. Assuming 2 -->
 <!-- Base address at offsets 12..16 should hold a number. Assuming 0 -->
@@ -7,3 +8,4 @@
 <!-- Directory offset 24: Bad value for data length and/or length starting -->
 <!-- Base address not at end of directory, base 0, end 25 -->
 </record>
+</collection>
index 54323ee..56fbdcc 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
   <leader>00492nam a22001455a 4500</leader>
   <controlfield tag="001">000277485</controlfield>
   <controlfield tag="005">20051026111436.0</controlfield>
@@ -29,3 +30,4 @@
     <subfield code="a">Cryptography.</subfield>
   </datafield>
 </record>
+</collection>
index a78dca2..f4799f4 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
   <leader>00366nam a22001698a 4500</leader>
   <controlfield tag="001">   11224466 </controlfield>
   <controlfield tag="003">DLC</controlfield>
@@ -30,3 +31,4 @@
     <subfield code="a">p. cm.</subfield>
   </datafield>
 </record>
+</collection>
index bc6922d..17927db 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
   <leader>03114cam a2200349 i 4500</leader>
   <controlfield tag="001">   77123332 </controlfield>
   <controlfield tag="003">DLC</controlfield>
@@ -84,3 +85,4 @@
     <subfield code="a">Standard PC Keyboard:   1234567890-=   !@#$%^&amp;*()_+   qwertyuiop[]\   QWERTYUIOP{}|   asdfghjkl;&apos;  ASDFGHJKL:&quot;   zxcvbnm,./   ZXCVBNM&lt;&gt;?</subfield>
   </datafield>
 </record>
+</collection>
index bb73af0..edb674e 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
   <leader>02647nam^a2200469^^^4500</leader>
   <controlfield tag="001">UCD-002592301</controlfield>
   <controlfield tag="005">20061209034435.0</controlfield>
     <subfield code="b">002592301</subfield>
   </datafield>
 </record>
+</collection>
index d7ed2a5..d8b83d7 100644 (file)
@@ -1,4 +1,5 @@
-<record xmlns="http://www.loc.gov/MARC21/slim">
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+<record>
   <leader>02075cas a22005055a 4500</leader>
   <controlfield tag="001">  2005336282</controlfield>
   <controlfield tag="003">DLC</controlfield>
     <subfield code="a">LC Cairo Office [we 45]</subfield>
   </datafield>
 </record>
+</collection>
index e65aed5..2dc36ef 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdump.c,v 1.53 2007-09-23 07:40:13 adam Exp $
+ * $Id: marcdump.c,v 1.54 2007-12-17 20:59:32 adam Exp $
  */
 
 #define _FILE_OFFSET_BITS 64
@@ -14,7 +14,7 @@
 #if YAZ_HAVE_XML2
 #include <libxml/parser.h>
 #include <libxml/tree.h>
-
+#include <libxml/xmlreader.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
 
@@ -93,34 +93,54 @@ static void marcdump_read_line(yaz_marc_t mt, const char *fname)
         fputs(wrbuf_cstr(wrbuf), stdout);
         wrbuf_destroy(wrbuf);
     }
+    {
+        WRBUF wrbuf = wrbuf_alloc();
+        yaz_marc_write_trailer(mt, wrbuf);
+        fputs(wrbuf_cstr(wrbuf), stdout);
+        wrbuf_destroy(wrbuf);
+    }
     fclose(inf);
 }
 
 #if YAZ_HAVE_XML2
 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
 {
-    xmlNodePtr ptr;
-    xmlDocPtr doc = xmlParseFile(fname);
-    if (!doc)
-        return;
+    xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */,
+                                               0 /* options */);
 
-    ptr = xmlDocGetRootElement(doc);
-    if (ptr)
+    if (reader)
     {
-        int r;
+        int ret;
         WRBUF wrbuf = wrbuf_alloc();
-        r = yaz_marc_read_xml(mt, ptr);
-        if (r)
-            fprintf(stderr, "yaz_marc_read_xml failed\n");
-        else
+        while ((ret = xmlTextReaderRead(reader)) == 1)
         {
-            yaz_marc_write_mode(mt, wrbuf);
-            
-            fputs(wrbuf_cstr(wrbuf), stdout);
+            int type = xmlTextReaderNodeType(reader);
+            if (type == XML_READER_TYPE_ELEMENT)
+            {
+                const char *name = (const char *) 
+                    xmlTextReaderConstName(reader);
+                if (!strcmp(name, "record"))
+                {
+                    xmlNodePtr ptr = xmlTextReaderExpand(reader);
+        
+                    int r = yaz_marc_read_xml(mt, ptr);
+                    if (r)
+                        fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    else
+                    {
+                        yaz_marc_write_mode(mt, wrbuf);
+                        
+                        fputs(wrbuf_cstr(wrbuf), stdout);
+                        wrbuf_rewind(wrbuf);
+                    }
+                }
+            }
         }
+        yaz_marc_write_trailer(mt, wrbuf);
+        fputs(wrbuf_cstr(wrbuf), stdout);
         wrbuf_destroy(wrbuf);
+        xmlFreeTextReader(reader);
     }
-    xmlFreeDoc(doc);
 }
 #endif
 
@@ -152,6 +172,7 @@ static void dump(const char *fname, const char *from, const char *to,
         yaz_marc_iconv(mt, cd);
     }
     yaz_marc_xml(mt, output_format);
+    yaz_marc_enable_collection(mt);
     yaz_marc_write_using_libxml2(mt, write_using_libxml2);
     yaz_marc_debug(mt, verbose);
 
@@ -307,6 +328,12 @@ static void dump(const char *fname, const char *from, const char *to,
             fprintf (cfile, "};\n");
         fclose(inf);
     }
+    {
+        WRBUF wrbuf = wrbuf_alloc();
+        yaz_marc_write_trailer(mt, wrbuf);
+        fputs(wrbuf_cstr(wrbuf), stdout);
+        wrbuf_destroy(wrbuf);
+    }
     if (cd)
         yaz_iconv_close(cd);
     yaz_marc_destroy(mt);