New facilities for the MARC module. The reading - and writing of
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 19 Apr 2006 10:05:02 +0000 (10:05 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 19 Apr 2006 10:05:02 +0000 (10:05 +0000)
content are separate methods for the yaz_marc_t handle. The following
read functions are available: yaz_marc_read_iso2709 (Reads MARC in
ISO2709 format), yaz_marc_read_xml (reads MARC in MARCXML/MarcXchange
format). Write functions have prefix yaz_marc_write_.. The existing
utilities yaz_marc_decode_wrbuf and yaz_marc_decode_buf are still
available. Removed support for OAI-MARC and simplexml (not the
simplexml from PHP5).

33 files changed:
NEWS
doc/yaz-marcdump-man.xml
include/yaz/marcdisp.h
include/yaz/wrbuf.h
src/marcdisp.c
src/querytowrbuf.c
src/wrbuf.c
src/xmlquery.c
src/zoom-c.c
test/Makefile.am
test/marc1.chr [new file with mode: 0644]
test/marc1.xml
test/marc1.xml.marc [new file with mode: 0644]
test/marc2.chr [new file with mode: 0644]
test/marc2.xml
test/marc2.xml.marc [new file with mode: 0644]
test/marc3.chr [new file with mode: 0644]
test/marc3.xml
test/marc3.xml.marc [new file with mode: 0644]
test/marc4.chr [new file with mode: 0644]
test/marc4.xml
test/marc4.xml.marc [new file with mode: 0644]
test/marc5.chr [new file with mode: 0644]
test/marc5.xml [new file with mode: 0644]
test/marc5.xml.marc [new file with mode: 0644]
test/marc6 [new file with mode: 0644]
test/marc6.chr [new file with mode: 0644]
test/marc6.xml [new file with mode: 0644]
test/marc6.xml.marc [new file with mode: 0644]
test/tstmarc.sh [deleted file]
test/tstmarciso.sh [new file with mode: 0755]
test/tstmarcxml.sh [new file with mode: 0755]
util/marcdump.c

diff --git a/NEWS b/NEWS
index 3597881..6781a21 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,16 @@
+New facilities for the MARC module. The reading - and writing of
+content are separate methods for the yaz_marc_t handle. The following
+read functions are available: yaz_marc_read_iso2709 (Reads MARC in
+ISO2709 format), yaz_marc_read_xml (reads MARC in MARCXML/MarcXchange
+format). Write functions have prefix yaz_marc_write_.. The existing
+utilities yaz_marc_decode_wrbuf and yaz_marc_decode_buf are still
+available. Removed support for OAI-MARC and simplexml (not the
+simplexml from PHP5).
+
 Added a new ZOOM event type ZOOM_EVENT_END which signals no more events
 to be returned for this connection.
 
---- 2.1.16 2006/03/31 
+--- 2.1.16 2006/03/31
 
 Allow multiple languages and charsets to be specified with
 yaz-client. Each item must be separated by comma (NO BLANKS). E.g.
@@ -11,10 +20,10 @@ Translation of proximity nodes from CQL into PQF now works.
 
 Moved to automake 1.8, 1.9.
 
-Added function yaz_log_set_handler which allows a log handler
-to be installed. This handler will be called for all log messages.
-Output to file is also produced; but that can be disabled by passing
-NULL fname to yaz_log_init_file.
+Added function yaz_log_set_handler which allows a log handler to be
+installed. This handler will be called for all log messages.  Output
+to file is also produced; but that can be disabled by passing NULL
+fname to yaz_log_init_file.
 
 Fixed another problem with MARC-8 -> ISO-8859-1 conversions. Bug #537.
 
index e73d9be..257b0b1 100644 (file)
@@ -1,5 +1,5 @@
 <!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
-<!-- $Id: yaz-marcdump-man.xml,v 1.5 2005-02-08 23:43:56 adam Exp $ -->
+<!-- $Id: yaz-marcdump-man.xml,v 1.6 2006-04-19 10:05:02 adam Exp $ -->
 <refentry id="yaz-marcdump">
  <refmeta>
   <refentrytitle>yaz-marcdump</refentrytitle>
@@ -18,7 +18,6 @@
    <arg choice="opt"><option>-X</option></arg>
    <arg choice="opt"><option>-e</option></arg>
    <arg choice="opt"><option>-I</option></arg>
-   <arg choice="opt"><option>-O</option></arg>
    <arg choice="opt"><option>-f <replaceable>from</replaceable></option></arg>
    <arg choice="opt"><option>-t <replaceable>to</replaceable></option></arg>
    <arg choice="opt"><option>-v</option></arg>
  
  <refsect1><title>DESCRIPTION</title>
   <para>
-   <command>yaz-marcdump</command> reads ISO2709/MARC records from one or
+   <command>yaz-marcdump</command> reads MARC records from one or
    more files.
-   It validates each record and supports output in line-format, 
-   MARCXML, OAIMARC as well as Hex output.
+   It parses each record and supports output in line-format, 
+   ISO2709, MARCXML, MarcXchange as well as Hex output.
   </para>
   <para>
-   By default, each record is printed to standard output in a line
+   This utility parses records ISO2709(raw MARC) as well as XML
+   if that is structured as MARCXML/MarcXchange.
+  </para>
+  <note>
+   <para>
+    As of YAZ 2.1.18, OAI-MARC is no longer supported.
+    OAI-MARC is deprecated. Use MARCXML instead.
+   </para>
+  </note>
+  <para>
+   By default, each record is written to standard output in a line
    format with newline for each field, $x for each subfield x.
+   The output format may be changed with options <literal>-X</literal>,
+   <literal>-e</literal>, <literal>-I</literal>.
   </para>
   <para>
    <command>yaz-marcdump</command> can also be requested to perform
    <varlistentry>
     <term>-x</term>
     <listitem><para>
-      Print MARC records in a simple XML format. 
-      This format is equivalent to YAZ_MARC_SIMPLEXML in
-      <filename>yaz/marcdisp.h</filename>.
+      Reads MARC records in MARCXML/MarcXchange format. Without
+      this option, <command>yaz-marcdump</command> reads records
+      in ISO2709 format.
      </para></listitem>
    </varlistentry>
 
    <varlistentry>
     <term>-X</term>
     <listitem><para>
-      Print MARC records in MARCXML.
+      Writes MARC records in MARCXML.
       This format is equivalent to YAZ_MARC_MARCXML in
       <filename>yaz/marcdisp.h</filename>.
      </para></listitem>
@@ -68,7 +79,7 @@
    <varlistentry>
     <term>-e</term>
     <listitem><para>
-      Print MARC records in MarcXchange format.
+      Writes MARC records in MarcXchange format.
       This format is equivalent to YAZ_MARC_XCHANGE in
       <filename>yaz/marcdisp.h</filename>.
      </para></listitem>
    <varlistentry>
     <term>-I</term>
     <listitem><para>
-      Print MARC records in ISO2709 format.
+      Writes MARC records in ISO2709 format.
       This format is equivalent to YAZ_MARC_ISO2709 in
       <filename>yaz/marcdisp.h</filename>.
      </para></listitem>
    </varlistentry>
 
    <varlistentry>
-    <term>-O</term>
-    <listitem><para>
-      Print MARC records in OAIMARC. Another XML variant.
-      This format is equivalent to YAZ_MARC_OAIMARC in
-      <filename>yaz/marcdisp.h</filename>.
-     </para></listitem>
-   </varlistentry>
-   
-   <varlistentry>
     <term>-f<replaceable>from</replaceable>]</term>
     <listitem><para>
       Specify the character set <replaceable>from</replaceable>
    <varlistentry>
     <term>-v</term>
     <listitem><para>
-      Print more information about the parsing process.
+      Writes more information about the parsing process.
       Useful if you have ill-formatted ISO2709 records as input.
      </para></listitem>
    </varlistentry>
index 4c425f3..f6a35dc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1995-2005, Index Data ApS
+ * Copyright (C) 1995-2006, Index Data ApS
  *
  * Permission to use, copy, modify, distribute, and sell this software and
  * its documentation, in whole or in part, for any purpose, is hereby granted,
  * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  * OF THIS SOFTWARE.
  *
- * $Id: marcdisp.h,v 1.14 2005-06-25 15:46:03 adam Exp $
+ * $Id: marcdisp.h,v 1.15 2006-04-19 10:05:02 adam Exp $
  */
 
 /**
  * \file marcdisp.h
- * \brief Header for MARC display - and conversion utilities
+ * \brief MARC conversion
  */
 
 #ifndef MARCDISP_H
 
 YAZ_BEGIN_CDECL
 
+/** \brief a yaz_marc_t handle (private content) */
 typedef struct yaz_marc_t_ *yaz_marc_t;
 
-/* create handler */
+/** \brief construct yaz_marc_t handle */
 YAZ_EXPORT yaz_marc_t yaz_marc_create(void);
-/* destroy */
+
+/** \brief destroy yaz_marc_t handle */
 YAZ_EXPORT void yaz_marc_destroy(yaz_marc_t mt);
 
-/* set XML mode YAZ_MARC_LINE, YAZ_MARC_SIMPLEXML, ... */
+/** \brief set XML mode YAZ_MARC_LINE, YAZ_MARC_SIMPLEXML, ... */
 YAZ_EXPORT void yaz_marc_xml(yaz_marc_t mt, int xmlmode);
+
+/** \brief Output format: Line-format */
 #define YAZ_MARC_LINE      0
+/** \brief Output format: simplexml (no longer supported) */
 #define YAZ_MARC_SIMPLEXML 1
+/** \brief Output format: OAI-MARC (no longer supported) */
 #define YAZ_MARC_OAIMARC   2
+/** \brief Output format: MARCXML */
 #define YAZ_MARC_MARCXML   3
+/** \brief Output format: ISO2709 */
 #define YAZ_MARC_ISO2709   4
+/** \brief Output format: MarcXchange */
 #define YAZ_MARC_XCHANGE   5
 
-/* supply iconv handle for character set conversion .. */
+/** \brief supply iconv handle for character set conversion .. */
 YAZ_EXPORT void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd);
 
-/* set debug level, 0=none, 1=more, 2=even more, .. */
+/** \brief set debug level 
+    \param mt handle
+    \param level level, where 0=lowest, 1 more debug, 2 even more 
+*/
 YAZ_EXPORT void yaz_marc_debug(yaz_marc_t mt, int level);
 
-/* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
-   On success, result in *result with size *rsize. */
-YAZ_EXPORT int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
-                                    char **result, int *rsize);
-
-/* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
-   On success, result in WRBUF */
-YAZ_EXPORT int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf,
-                                      int bsize, WRBUF wrbuf);
-
-/* old functions (depricated) */
-YAZ_EXPORT int marc_display (const char *buf, FILE *outf);
-YAZ_EXPORT int marc_display_ex (const char *buf, FILE *outf, int debug);
-YAZ_EXPORT int marc_display_exl (const char *buf, FILE *outf, int debug,
-                                 int length);
-YAZ_EXPORT int marc_display_wrbuf (const char *buf, WRBUF wr, int debug,
-                                   int bsize);
+/** \brief decodes ISO2709 buffer using straight buffers
+    \param mt marc handle
+    \param buf input buffer
+    \param bsize size of buffer or (-1 if "any size")
+    \param result result to be stored here (allocate before use!)
+    \param rsize size of result (set before calling)
+    
+    decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
+    On success, result in *result with size *rsize. 
+    Returns -1 on error, size of input record (>0) if OK
+*/
+YAZ_EXPORT int yaz_marc_decode_buf(yaz_marc_t mt, const char *buf, int bsize,
+                                   char **result, int *rsize);
+
+/** \brief decodes ISO2709/MAC buffer and stores result in WRBUF */
+YAZ_EXPORT int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf,
+                                     int bsize, WRBUF wrbuf);
+
+/** \brief depricated */
+YAZ_EXPORT int marc_display(const char *buf, FILE *outf);
+/** \brief depricated */
+YAZ_EXPORT int marc_display_ex(const char *buf, FILE *outf, int debug);
+/** \brief depricated */
+YAZ_EXPORT int marc_display_exl(const char *buf, FILE *outf, int debug,
+                                int length);
+/** \brief depricated */
+YAZ_EXPORT int marc_display_wrbuf(const char *buf, WRBUF wr, int debug,
+                                  int bsize);
+/** \brief depricated */
 YAZ_EXPORT int yaz_marc_decode(const char *buf, WRBUF wr,
                                int debug, int bsize, int xml);
 
 YAZ_EXPORT void yaz_marc_subfield_str(yaz_marc_t mt, const char *s);
 YAZ_EXPORT void yaz_marc_endline_str(yaz_marc_t mt, const char *s);
 
-/* like atoi except that it reads exactly len characters */
-YAZ_EXPORT int atoi_n (const char *buf, int len);
+/** \brief like atoi except that it reads exactly len characters */
+YAZ_EXPORT int atoi_n(const char *buf, int len);
 
-/* MARC control characters */
+/** \brief MARC control char: record separator (29 Dec, 1D Hex) */
 #define ISO2709_RS 035
+/** \brief MARC control char: field separator (30 Dec, 1E Hex) */
 #define ISO2709_FS 036
+/** \brief MARC control char: identifier-field separator (31 Dec, 1F Hex) */
 #define ISO2709_IDFS 037
 
+/** \brief read ISO2709/MARC record from buffer */
+YAZ_EXPORT int yaz_marc_read_iso2709(yaz_marc_t mt,
+                                     const char *buf, int bsize);
+/** \brief read MARCXML record from buffer */
+YAZ_EXPORT int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode);
+
+/** \brief writes record in line format */
+YAZ_EXPORT int yaz_marc_write_line(yaz_marc_t mt, WRBUF wrbuf);
+/** \brief writes record in MARCXML format */
+YAZ_EXPORT int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wrbuf);
+/** \brief writes record in MarcXchange format */
+YAZ_EXPORT int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wrbuf);
+/** \brief writes record in ISO2709 format */
+YAZ_EXPORT int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wrbuf);
+/** \brief writes record in mode - given by yaz_marc_xml mode */
+YAZ_EXPORT int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr);
+
 YAZ_END_CDECL
 
 #endif
index 44ab87e..0999a45 100644 (file)
@@ -23,7 +23,7 @@
  * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  * OF THIS SOFTWARE.
  *
- * $Id: wrbuf.h,v 1.16 2005-09-27 17:52:46 adam Exp $
+ * $Id: wrbuf.h,v 1.17 2006-04-19 10:05:02 adam Exp $
  */
 /**
  * \file wrbuf.h
@@ -53,11 +53,17 @@ YAZ_EXPORT int wrbuf_write(WRBUF b, const char *buf, int size);
 YAZ_EXPORT int wrbuf_xmlputs_n(WRBUF b, const char *cp, int size);
 YAZ_EXPORT int wrbuf_puts(WRBUF b, const char *buf);
 YAZ_EXPORT int wrbuf_xmlputs(WRBUF b, const char *cp);
-YAZ_EXPORT void wrbuf_printf(WRBUF b, const char *fmt, ...);
+YAZ_EXPORT void wrbuf_printf(WRBUF b, const char *fmt, ...)
+#ifdef __GNUC__
+        __attribute__ ((format (printf, 2, 3)))
+#endif
+        ;
+
 YAZ_EXPORT int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf,
                                  int size);
 YAZ_EXPORT int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd,
                                        const char *buf, int size);
+YAZ_EXPORT int wrbuf_iconv_puts(WRBUF b, yaz_iconv_t cd, const char *strz);
 
 YAZ_EXPORT void wrbuf_chop_right(WRBUF b);
 
index 6dba155..ea4b2bc 100644 (file)
@@ -1,19 +1,24 @@
 /*
- * Copyright (C) 1995-2005, Index Data ApS
+ * Copyright (C) 1995-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdisp.c,v 1.25 2006-01-26 15:37:05 adam Exp $
+ * $Id: marcdisp.c,v 1.26 2006-04-19 10:05:03 adam Exp $
  */
 
 /**
  * \file marcdisp.c
- * \brief Implements MARC display - and conversion utilities
+ * \brief Implements MARC conversion utilities
  */
 
 #if HAVE_CONFIG_H
 #include <config.h>
 #endif
 
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
 #include <yaz/wrbuf.h>
 #include <yaz/yaz-util.h>
 
+#if HAVE_XML2
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#endif
+
+static void yaz_marc_reset(yaz_marc_t mt);
+
+/** \brief node types for yaz_marc_node */
+enum YAZ_MARC_NODE_TYPE
+{ 
+    YAZ_MARC_DATAFIELD,
+    YAZ_MARC_CONTROLFIELD,
+    YAZ_MARC_COMMENT,
+    YAZ_MARC_LEADER
+};
+
+/** \brief represets a data field */
+struct yaz_marc_datafield {
+    char *tag;
+    char *indicator;
+    struct yaz_marc_subfield *subfields;
+};
+
+/** \brief represents a control field */
+struct yaz_marc_controlfield {
+    char *tag;
+    char *data;
+};
+
+/** \brief a comment node */
+struct yaz_marc_comment {
+    char *comment;
+};
+
+/** \brief MARC node */
+struct yaz_marc_node {
+    enum YAZ_MARC_NODE_TYPE which;
+    union {
+        struct yaz_marc_datafield datafield;
+        struct yaz_marc_controlfield controlfield;
+        char *comment;
+        char *leader;
+    } u;
+    struct yaz_marc_node *next;
+};
+
+/** \brief represents a subfield */
+struct yaz_marc_subfield {
+    char *code_data;
+    struct yaz_marc_subfield *next;
+};
+
+/** \brief the internals of a yaz_marc_t handle */
 struct yaz_marc_t_ {
     WRBUF m_wr;
+    NMEM nmem;
     int xml;
     int debug;
     yaz_iconv_t iconv_cd;
     char subfield_str[8];
     char endline_str[8];
+    struct yaz_marc_node *nodes;
+    struct yaz_marc_node **nodes_pp;
+    struct yaz_marc_subfield **subfield_pp;
 };
 
 yaz_marc_t yaz_marc_create(void)
@@ -39,9 +101,263 @@ yaz_marc_t yaz_marc_create(void)
     mt->iconv_cd = 0;
     strcpy(mt->subfield_str, " $");
     strcpy(mt->endline_str, "\n");
+
+    mt->nmem = nmem_create();
+    yaz_marc_reset(mt);
     return mt;
 }
 
+void yaz_marc_destroy(yaz_marc_t mt)
+{
+    if (!mt)
+        return ;
+    nmem_destroy(mt->nmem);
+    wrbuf_free (mt->m_wr, 1);
+    xfree (mt);
+}
+
+struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
+{
+    struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
+    n->next = 0;
+    *mt->nodes_pp = n;
+    mt->nodes_pp = &n->next;
+    return n;
+}
+
+void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_COMMENT;
+    n->u.comment = nmem_strdup(mt->nmem, comment);
+}
+
+#if HAVE_XML2
+static char *yaz_marc_get_xml_text(const xmlNode *ptr_cdata, NMEM nmem)
+{
+    char *cdata;
+    int len = 0;
+    const xmlNode *ptr;
+
+    for (ptr = ptr_cdata; ptr; ptr = ptr->next)
+        if (ptr->type == XML_TEXT_NODE)
+            len += xmlStrlen(ptr->content);
+    cdata = (char *) nmem_malloc(nmem, len+1);
+    *cdata = '\0';
+    for (ptr = ptr_cdata; ptr; ptr = ptr->next)
+        if (ptr->type == XML_TEXT_NODE)
+            strcat(cdata, (const char *) ptr->content);
+    return cdata;
+}
+#endif
+
+void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
+{
+    va_list ap;
+    char buf[200];
+    va_start(ap, fmt);
+
+#ifdef WIN32
+    _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
+#else
+/* !WIN32 */
+#if HAVE_VSNPRINTF
+    vsnprintf(buf, sizeof(buf), fmt, ap);
+#else
+    vsprintf(buf, fmt, ap);
+#endif
+#endif
+/* WIN32 */
+    yaz_marc_add_comment(mt, buf);
+    va_end (ap);
+}
+
+void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_LEADER;
+    n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
+}
+
+void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
+                               const char *data, size_t data_len)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_CONTROLFIELD;
+    n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
+    n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
+    if (mt->debug)
+    {
+        size_t i;
+        char msg[80];
+
+        sprintf(msg, "controlfield:");
+        for (i = 0; i < 16 && i < data_len; i++)
+            sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
+        if (i < data_len)
+            sprintf(msg + strlen(msg), " ..");
+        yaz_marc_add_comment(mt, msg);
+    }
+}
+
+#if HAVE_XML2
+void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+                                   const xmlNode *ptr_data)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_CONTROLFIELD;
+    n->u.controlfield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
+    n->u.controlfield.data = yaz_marc_get_xml_text(ptr_data, mt->nmem);
+}
+#endif
+
+void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
+                            const char *indicator, size_t indicator_len)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_DATAFIELD;
+    n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
+    n->u.datafield.indicator =
+        nmem_strdupn(mt->nmem, indicator, indicator_len);
+    n->u.datafield.subfields = 0;
+
+    /* make subfield_pp the current (last one) */
+    mt->subfield_pp = &n->u.datafield.subfields;
+}
+
+#if HAVE_XML2
+void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+                                const char *indicator, size_t indicator_len)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_DATAFIELD;
+    n->u.datafield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem);
+    n->u.datafield.indicator =
+        nmem_strdupn(mt->nmem, indicator, indicator_len);
+    n->u.datafield.subfields = 0;
+
+    /* make subfield_pp the current (last one) */
+    mt->subfield_pp = &n->u.datafield.subfields;
+}
+#endif
+
+void yaz_marc_add_subfield(yaz_marc_t mt,
+                           const char *code_data, size_t code_data_len)
+{
+    if (mt->debug)
+    {
+        size_t i;
+        char msg[80];
+
+        sprintf(msg, "subfield:");
+        for (i = 0; i < 16 && i < code_data_len; i++)
+            sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
+        if (i < code_data_len)
+            sprintf(msg + strlen(msg), " ..");
+        yaz_marc_add_comment(mt, msg);
+    }
+
+    if (mt->subfield_pp)
+    {
+        struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
+        n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
+        n->next = 0;
+        /* mark subfield_pp to point to this one, so we append here next */
+        *mt->subfield_pp = n;
+        mt->subfield_pp = &n->next;
+    }
+}
+
+static int atoi_n_check(const char *buf, int size, int *val)
+{
+    if (!isdigit(*(const unsigned char *) buf))
+        return 0;
+    *val = atoi_n(buf, size);
+    return 1;
+}
+
+/** \brief reads the MARC 24 bytes leader and checks content
+    \param mt handle
+    \param leader of the 24 byte leader
+    \param indicator_length indicator length
+    \param identifier_length identifier length
+    \param base_address base address
+    \param length_data_entry length of data entry
+    \param length_starting length of starting 
+    \param length_implementation length of implementation defined data
+*/
+static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
+                                 int *indicator_length,
+                                 int *identifier_length,
+                                 int *base_address,
+                                 int *length_data_entry,
+                                 int *length_starting,
+                                 int *length_implementation)
+{
+    char leader[24];
+
+    memcpy(leader, leader_c, 24);
+
+    if (!atoi_n_check(leader+10, 1, indicator_length))
+    {
+        yaz_marc_cprintf(mt, 
+                         "Indicator length at offset 10 should hold a digit."
+                         " Assuming 2");
+        leader[10] = '2';
+        *indicator_length = 2;
+    }
+    if (!atoi_n_check(leader+11, 1, identifier_length))
+    {
+        yaz_marc_cprintf(mt, 
+                         "Identifier length at offset 11 should hold a digit."
+                         " Assuming 2");
+        leader[11] = '2';
+        *identifier_length = 2;
+    }
+    if (!atoi_n_check(leader+12, 5, base_address))
+    {
+        yaz_marc_cprintf(mt, 
+                         "Base address at offsets 12..16 should hold a number."
+                         " Assuming 0");
+        *base_address = 0;
+    }
+    if (!atoi_n_check(leader+20, 1, length_data_entry))
+    {
+        yaz_marc_cprintf(mt, 
+                         "Length data entry at offset 20 should hold a digit."
+                         " Assuming 4");
+        *length_data_entry = 4;
+        leader[20] = '4';
+    }
+    if (!atoi_n_check(leader+21, 1, length_starting))
+    {
+        yaz_marc_cprintf(mt,
+                         "Length starting at offset 21 should hold a digit."
+                         " Assuming 5");
+        *length_starting = 5;
+        leader[21] = '5';
+    }
+    if (!atoi_n_check(leader+22, 1, length_implementation))
+    {
+        yaz_marc_cprintf(mt, 
+                         "Length implementation at offset 22 should hold a digit."
+                         " Assuming 0");
+        *length_implementation = 0;
+        leader[22] = '0';
+    }
+
+    if (mt->debug)
+    {
+        yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
+        yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
+        yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
+        yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
+        yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
+        yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
+    }
+    yaz_marc_add_leader(mt, leader, 24);
+}
+
 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
 {
     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
@@ -54,14 +370,6 @@ void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
 }
 
-void yaz_marc_destroy(yaz_marc_t mt)
-{
-    if (!mt)
-        return ;
-    wrbuf_free (mt->m_wr, 1);
-    xfree (mt);
-}
-
 static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
 {
     if (mt->xml == YAZ_MARC_ISO2709)
@@ -96,189 +404,584 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
     return 1; /* we don't know */
 }
                               
-static int atoi_n_check(const char *buf, int size, int *val)
+static void yaz_marc_reset(yaz_marc_t mt)
 {
-    if (!isdigit(*(const unsigned char *) buf))
-        return 0;
-    *val = atoi_n(buf, size);
-    return 1;
+    nmem_reset(mt->nmem);
+    mt->nodes = 0;
+    mt->nodes_pp = &mt->nodes;
+    mt->subfield_pp = 0;
 }
 
-int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
+int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
 {
-    int entry_p;
-    int record_length;
-    int indicator_length;
+    struct yaz_marc_node *n;
     int identifier_length;
-    int end_of_directory;
-    int base_address;
-    int length_data_entry;
-    int length_starting;
-    int length_implementation;
-    char lead[24];
-    int produce_warnings = 0;
+    const char *leader = 0;
 
-    if (mt->debug)
-        produce_warnings = 1;
-    if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC
-        || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE)
-        produce_warnings = 1;
+    for (n = mt->nodes; n; n = n->next)
+        if (n->which == YAZ_MARC_LEADER)
+        {
+            leader = n->u.leader;
+            break;
+        }
+    
+    if (!leader)
+        return -1;
+    if (!atoi_n_check(leader+11, 1, &identifier_length))
+        return -1;
 
-    record_length = atoi_n (buf, 5);
-    if (record_length < 25)
+    for (n = mt->nodes; n; n = n->next)
     {
-        if (mt->debug)
-            wrbuf_printf(wr, "<!-- Record length %d - aborting -->\n",
-                            record_length);
-        return -1;
+        struct yaz_marc_subfield *s;
+        switch(n->which)
+        {
+        case YAZ_MARC_DATAFIELD:
+            wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
+                         n->u.datafield.indicator);
+            for (s = n->u.datafield.subfields; s; s = s->next)
+            {
+                /* if identifier length is 2 (most MARCs),
+                   the code is a single character .. However we've
+                   seen multibyte codes, so see how big it really is */
+                size_t using_code_len = 
+                    (identifier_length != 2) ? identifier_length - 1
+                    :
+                    cdata_one_character(mt, s->code_data);
+                
+                wrbuf_puts (wr, mt->subfield_str); 
+                wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
+                                  using_code_len);
+                wrbuf_printf(wr, " ");
+                wrbuf_iconv_puts(wr, mt->iconv_cd, 
+                                 s->code_data + using_code_len);
+            }
+            wrbuf_puts (wr, mt->endline_str);
+            break;
+        case YAZ_MARC_CONTROLFIELD:
+            wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
+            wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
+            wrbuf_puts (wr, mt->endline_str);
+            break;
+        case YAZ_MARC_COMMENT:
+            wrbuf_puts(wr, "(");
+            wrbuf_iconv_write(wr, mt->iconv_cd, 
+                              n->u.comment, strlen(n->u.comment));
+            wrbuf_puts(wr, ")\n");
+            break;
+        case YAZ_MARC_LEADER:
+            wrbuf_printf(wr, "%s\n", n->u.leader);
+        }
     }
-    memcpy(lead, buf, 24);  /* se can modify the header for output */
+    return 0;
+}
 
-    /* ballout if bsize is known and record_length is less than that */
-    if (bsize != -1 && record_length > bsize)
+int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
+{
+    switch(mt->xml)
+    {
+    case YAZ_MARC_LINE:
+        return yaz_marc_write_line(mt, wr);
+    case YAZ_MARC_MARCXML:
+        return yaz_marc_write_marcxml(mt, wr);
+    case YAZ_MARC_XCHANGE:
+        return yaz_marc_write_marcxchange(mt, wr);
+    case YAZ_MARC_ISO2709:
+        return yaz_marc_write_iso2709(mt, wr);
+    }
+    return -1;
+}
+
+static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
+                                     const char *ns)
+{
+    struct yaz_marc_node *n;
+    int identifier_length;
+    const char *leader = 0;
+
+    for (n = mt->nodes; n; n = n->next)
+        if (n->which == YAZ_MARC_LEADER)
+        {
+            leader = n->u.leader;
+            break;
+        }
+    
+    if (!leader)
+        return -1;
+    if (!atoi_n_check(leader+11, 1, &identifier_length))
         return -1;
-    if (!atoi_n_check(buf+10, 1, &indicator_length))
+
+    wrbuf_printf(wr, "<record xmlns=\"%s\">\n", ns);
+    for (n = mt->nodes; n; n = n->next)
     {
-        if (produce_warnings)
-            wrbuf_printf(wr, "<!-- Indicator length at offset 10 should hold a digit. Assuming 2 -->\n");
-        lead[10] = '2';
-        indicator_length = 2;
+        struct yaz_marc_subfield *s;
+        switch(n->which)
+        {
+        case YAZ_MARC_DATAFIELD:
+            wrbuf_printf(wr, "  <datafield tag=\"");
+            wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
+                                    strlen(n->u.datafield.tag));
+            wrbuf_printf(wr, "\"");
+            if (n->u.datafield.indicator)
+            {
+                int i;
+                for (i = 0; n->u.datafield.indicator[i]; i++)
+                {
+                    wrbuf_printf(wr, " ind%d=\"", i+1);
+                    wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                          n->u.datafield.indicator+i, 1);
+                    wrbuf_printf(wr, "\"");
+                }
+            }
+            wrbuf_printf(wr, ">\n");
+            for (s = n->u.datafield.subfields; s; s = s->next)
+            {
+                /* if identifier length is 2 (most MARCs),
+                   the code is a single character .. However we've
+                   seen multibyte codes, so see how big it really is */
+                size_t using_code_len = 
+                    (identifier_length != 2) ? identifier_length - 1
+                    :
+                    cdata_one_character(mt, s->code_data);
+                
+                wrbuf_puts(wr, "    <subfield code=\"");
+                wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                        s->code_data, using_code_len);
+                wrbuf_puts(wr, "\">");
+                wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+                                        s->code_data + using_code_len,
+                                        strlen(s->code_data + using_code_len));
+                wrbuf_puts(wr, "</subfield>\n");
+            }
+            wrbuf_printf(wr, "  </datafield>\n");
+            break;
+        case YAZ_MARC_CONTROLFIELD:
+            wrbuf_printf(wr, "  <controlfield tag=\"");
+            wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+                                    strlen(n->u.controlfield.tag));
+            wrbuf_printf(wr, "\">");
+            wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
+            wrbuf_printf(wr, "</controlfield>\n");
+            break;
+        case YAZ_MARC_COMMENT:
+            wrbuf_printf(wr, "<!-- %s -->\n", n->u.comment);
+            break;
+        case YAZ_MARC_LEADER:
+            wrbuf_printf(wr, "  <leader>");
+            wrbuf_iconv_write_cdata(wr, 
+                                    0 /* no charset conversion for leader */,
+                                    n->u.leader, strlen(n->u.leader));
+            wrbuf_printf(wr, "</leader>\n");
+        }
     }
-    if (!atoi_n_check(buf+11, 1, &identifier_length))
+    wrbuf_puts(wr, "</record>\n");
+    return 0;
+}
+
+int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
+{
+    return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim");
+}
+
+int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr)
+{
+    return yaz_marc_write_marcxml_ns(mt, wr,
+                                     "http://www.bs.dk/standards/MarcXchange");
+}
+
+int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
+{
+    struct yaz_marc_node *n;
+    int indicator_length;
+    int identifier_length;
+    int length_data_entry;
+    int length_starting;
+    int length_implementation;
+    int data_offset = 0;
+    const char *leader = 0;
+    WRBUF wr_dir, wr_head;
+    int base_address;
+    
+    for (n = mt->nodes; n; n = n->next)
+        if (n->which == YAZ_MARC_LEADER)
+            leader = n->u.leader;
+    
+    if (!leader)
+        return -1;
+    if (!atoi_n_check(leader+10, 1, &indicator_length))
+        return -1;
+    if (!atoi_n_check(leader+11, 1, &identifier_length))
+        return -1;
+    if (!atoi_n_check(leader+20, 1, &length_data_entry))
+        return -1;
+    if (!atoi_n_check(leader+21, 1, &length_starting))
+        return -1;
+    if (!atoi_n_check(leader+22, 1, &length_implementation))
+        return -1;
+
+    wr_dir = wrbuf_alloc();
+    for (n = mt->nodes; n; n = n->next)
     {
-        if (produce_warnings)
-            wrbuf_printf(wr, "<!-- Identifier length at offset 11 should hold a digit. Assuming 2 -->\n");
-        lead[11] = '2';
-        identifier_length = 2;
+        int data_length = 0;
+        struct yaz_marc_subfield *s;
+        switch(n->which)
+        {
+        case YAZ_MARC_DATAFIELD:
+            wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
+            data_length += indicator_length;
+            for (s = n->u.datafield.subfields; s; s = s->next)
+                data_length += 1+strlen(s->code_data);
+            data_length++;
+            break;
+        case YAZ_MARC_CONTROLFIELD:
+            wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
+            data_length += strlen(n->u.controlfield.data);
+            data_length++;
+            break;
+        case YAZ_MARC_COMMENT:
+            break;
+        case YAZ_MARC_LEADER:
+            break;
+        }
+        if (data_length)
+        {
+            wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
+            wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
+            data_offset += data_length;
+        }
     }
-    if (!atoi_n_check(buf+12, 5, &base_address))
+    /* mark end of directory */
+    wrbuf_putc(wr_dir, ISO2709_FS);
+
+    /* base address of data (comes after leader+directory) */
+    base_address = 24 + wrbuf_len(wr_dir);
+
+    wr_head = wrbuf_alloc();
+
+    /* write record length */
+    wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
+    /* from "original" leader */
+    wrbuf_write(wr_head, leader+5, 7);
+    /* base address of data */
+    wrbuf_printf(wr_head, "%05d", base_address);
+    /* from "original" leader */
+    wrbuf_write(wr_head, leader+17, 7);
+    
+    wrbuf_write(wr, wrbuf_buf(wr_head), 24);
+    wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
+    wrbuf_free(wr_head, 1);
+    wrbuf_free(wr_dir, 1);
+
+    for (n = mt->nodes; n; n = n->next)
     {
-        if (produce_warnings)
-            wrbuf_printf(wr, "<!-- Base address at offsets 12..16 should hold a number. Assuming 0 -->\n");
-        base_address = 0;
+        struct yaz_marc_subfield *s;
+        switch(n->which)
+        {
+        case YAZ_MARC_DATAFIELD:
+            wrbuf_printf(wr, "%.*s", indicator_length,
+                         n->u.datafield.indicator);
+            for (s = n->u.datafield.subfields; s; s = s->next)
+                wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data);
+            wrbuf_printf(wr, "%c", ISO2709_FS);
+            break;
+        case YAZ_MARC_CONTROLFIELD:
+            wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS);
+            break;
+        case YAZ_MARC_COMMENT:
+            break;
+        case YAZ_MARC_LEADER:
+            break;
+        }
     }
-    if (!atoi_n_check(buf+20, 1, &length_data_entry))
+    wrbuf_printf(wr, "%c", ISO2709_RS);
+    return 0;
+}
+
+#if HAVE_XML2
+int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
+{
+    for (; ptr; ptr = ptr->next)
     {
-        if (produce_warnings)
-            wrbuf_printf(wr, "<!-- Length data entry at offset 20 should hold a digit. Assuming 4 -->\n");
-        length_data_entry = 4;
-        lead[20] = '4';
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "subfield"))
+            {
+                size_t ctrl_data_len = 0;
+                char *ctrl_data_buf = 0;
+                const xmlNode *p = 0, *ptr_code = 0;
+                struct _xmlAttr *attr;
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (!strcmp((const char *)attr->name, "code"))
+                        ptr_code = attr->children;
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'subfield'",
+                            attr->name);
+                        return -1;
+                    }
+                if (!ptr_code)
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing attribute 'code' for 'subfield'" );
+                    return -1;
+                }
+                if (ptr_code->type == XML_TEXT_NODE)
+                {
+                    ctrl_data_len = 
+                        strlen((const char *)ptr_code->content);
+                }
+                else
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing value for 'code' in 'subfield'" );
+                    return -1;
+                }
+                for (p = ptr->children; p ; p = p->next)
+                    if (p->type == XML_TEXT_NODE)
+                        ctrl_data_len += strlen((const char *)p->content);
+                ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
+                strcpy(ctrl_data_buf, (const char *)ptr_code->content);
+                for (p = ptr->children; p ; p = p->next)
+                    if (p->type == XML_TEXT_NODE)
+                        strcat(ctrl_data_buf, (const char *)p->content);
+                yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
+            }
+            else
+            {
+                yaz_marc_cprintf(
+                    mt, "Expected element 'subfield', got '%.80s'", ptr->name);
+                return -1;
+            }
+        }
     }
-    if (!atoi_n_check(buf+21, 1, &length_starting))
+    return 0;
+}
+
+static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
+{
+    int indicator_length;
+    int identifier_length;
+    int base_address;
+    int length_data_entry;
+    int length_starting;
+    int length_implementation;
+    const char *leader = 0;
+    const xmlNode *ptr = *ptr_p;
+
+    for(; ptr; ptr = ptr->next)
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "leader"))
+            {
+                xmlNode *p = ptr->children;
+                for(; p; p = p->next)
+                    if (p->type == XML_TEXT_NODE)
+                        leader = (const char *) p->content;
+                break;
+            }
+            else
+            {
+                yaz_marc_cprintf(
+                    mt, "Expected element 'leader', got '%.80s'", ptr->name);
+                return -1;
+            }
+        }
+    if (!leader)
     {
-        if (produce_warnings)
-            wrbuf_printf(wr, "<!-- Length starting at offset 21 should hold a digit. Assuming 5 -->\n");
-        length_starting = 5;
-        lead[21] = '5';
+        yaz_marc_cprintf(mt, "Missing element 'leader'");
+        return -1;
     }
-    if (!atoi_n_check(buf+22, 1, &length_implementation))
+    if (strlen(leader) != 24)
     {
-        if (produce_warnings)
-            wrbuf_printf(wr, "<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->\n");
-        length_implementation = 0;
-        lead[22] = '0';
+        yaz_marc_cprintf(mt, "Bad length %d of leader data."
+                         " Must have length of 24 characters", strlen(leader));
+        return -1;
     }
+    yaz_marc_read_leader(mt, leader,
+                         &indicator_length,
+                         &identifier_length,
+                         &base_address,
+                         &length_data_entry,
+                         &length_starting,
+                         &length_implementation);
+    *ptr_p = ptr;
+    return 0;
+}
 
-    if (mt->xml != YAZ_MARC_LINE)
-    {
-        char str[80];
-        int i;
-        switch(mt->xml)
+static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
+{
+    for(; ptr; ptr = ptr->next)
+        if (ptr->type == XML_ELEMENT_NODE)
         {
-        case YAZ_MARC_ISO2709:
-            break;
-        case YAZ_MARC_SIMPLEXML:
-            wrbuf_puts (wr, "<iso2709\n");
-            sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
-            wrbuf_puts (wr, str);
-            sprintf (str, " TypeOfRecord=\"%c\"\n", buf[6]);
-            wrbuf_puts (wr, str);
-            for (i = 1; i<=19; i++)
+            if (!strcmp((const char *) ptr->name, "controlfield"))
+            {
+                const xmlNode *ptr_tag = 0;
+                struct _xmlAttr *attr;
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (!strcmp((const char *)attr->name, "tag"))
+                        ptr_tag = attr->children;
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'controlfield'",
+                            attr->name);
+                        return -1;
+                    }
+                if (!ptr_tag)
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing attribute 'tag' for 'controlfield'" );
+                    return -1;
+                }
+                yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
+            }
+            else if (!strcmp((const char *) ptr->name, "datafield"))
             {
-                sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]);
-                wrbuf_puts (wr, str);
+                char indstr[11]; /* 0(unused), 1,....9, + zero term */
+                const xmlNode *ptr_tag = 0;
+                struct _xmlAttr *attr;
+                int i;
+                for (i = 0; i<11; i++)
+                    indstr[i] = '\0';
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (!strcmp((const char *)attr->name, "tag"))
+                        ptr_tag = attr->children;
+                    else if (strlen((const char *)attr->name) == 4 &&
+                             !memcmp(attr->name, "ind", 3))
+                    {
+                        int no = atoi((const char *)attr->name+3);
+                        if (attr->children
+                            && attr->children->type == XML_TEXT_NODE)
+                            indstr[no] = attr->children->content[0];
+                    }
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'datafield'",
+                            attr->name);
+                        return -1;
+                    }
+                if (!ptr_tag)
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing attribute 'tag' for 'datafield'" );
+                    return -1;
+                }
+                /* note that indstr[0] is unused so we use indstr[1..] */
+                yaz_marc_add_datafield_xml(mt, ptr_tag,
+                                           indstr+1, strlen(indstr+1));
+                
+                if (yaz_marc_read_xml_subfields(mt, ptr->children))
+                    return -1;
+            }
+            else
+            {
+                yaz_marc_cprintf(mt,
+                                 "Expected element controlfield or datafield,"
+                                 " got %.80s", ptr->name);
+                return -1;
+            }
+        }
+    return 0;
+}
+
+int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
+{
+    const xmlNode *ptr = xmlnode;
+    for(; ptr; ptr = ptr->next)
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "record"))
+                break;
+            else
+            {
+                yaz_marc_cprintf(
+                    mt, "Unknown element '%.80s' in MARC XML reader",
+                    ptr->name);
+                return -1;
             }
-            wrbuf_puts (wr, ">\n");
-            break;
-        case YAZ_MARC_OAIMARC:
-            wrbuf_puts(
-                wr,
-                "<oai_marc xmlns=\"http://www.openarchives.org/OIA/oai_marc\""
-                "\n"
-                " xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\""
-                "\n"
-                " xsi:schemaLocation=\"http://www.openarchives.org/OAI/oai_marc.xsd\""
-                "\n"
-                );
-            
-            sprintf (str, " status=\"%c\" type=\"%c\" catForm=\"%c\">\n",
-                     buf[5], buf[6], buf[7]);
-            wrbuf_puts (wr, str);
-            break;
-        case YAZ_MARC_MARCXML:
-            wrbuf_printf(
-                wr,
-                "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
-                "  <leader>");
-            lead[9] = 'a';                 /* set leader to signal unicode */
-            marc_cdata(mt, lead, 24, wr); 
-            wrbuf_printf(wr, "</leader>\n");
-            break;
-        case YAZ_MARC_XCHANGE:
-            wrbuf_printf(
-                wr,
-                "<record xmlns=\"http://www.bs.dk/standards/MarcXchange\">\n"
-                "  <leader>");
-            marc_cdata(mt, lead, 24, wr);
-            wrbuf_printf(wr, "</leader>\n");
-            break;
         }
+    if (!ptr)
+    {
+        yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
+        return -1;
     }
-    if (mt->debug)
+    /* ptr points to record node now */
+    ptr = ptr->children;
+    if (yaz_marc_read_xml_leader(mt, &ptr))
+        return -1;
+    return yaz_marc_read_xml_fields(mt, ptr->next);
+}
+#else
+int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
+{
+    return -1;
+}
+#endif
+
+int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
+{
+    int entry_p;
+    int record_length;
+    int indicator_length;
+    int identifier_length;
+    int end_of_directory;
+    int base_address;
+    int length_data_entry;
+    int length_starting;
+    int length_implementation;
+
+    yaz_marc_reset(mt);
+
+    record_length = atoi_n (buf, 5);
+    if (record_length < 25)
+    {
+        yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
+        return -1;
+    }
+    /* ballout if bsize is known and record_length is less than that */
+    if (bsize != -1 && record_length > bsize)
     {
-        char str[40];
-
-        wrbuf_puts (wr, "<!--\n");
-        sprintf (str, "Record length         %5d\n", record_length);
-        wrbuf_puts (wr, str);
-        sprintf (str, "Indicator length      %5d\n", indicator_length);
-        wrbuf_puts (wr, str);
-        sprintf (str, "Identifier length     %5d\n", identifier_length);
-        wrbuf_puts (wr, str);
-        sprintf (str, "Base address          %5d\n", base_address);
-        wrbuf_puts (wr, str);
-        sprintf (str, "Length data entry     %5d\n", length_data_entry);
-        wrbuf_puts (wr, str);
-        sprintf (str, "Length starting       %5d\n", length_starting);
-        wrbuf_puts (wr, str);
-        sprintf (str, "Length implementation %5d\n", length_implementation);
-        wrbuf_puts (wr, str);
-        wrbuf_puts (wr, "-->\n");
+        yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
+                         record_length, bsize);
+        return -1;
     }
+    if (mt->debug)
+        yaz_marc_cprintf(mt, "Record length         %5d", record_length);
 
-    /* first pass. determine length of directory & base of data */
+    yaz_marc_read_leader(mt, buf,
+                         &indicator_length,
+                         &identifier_length,
+                         &base_address,
+                         &length_data_entry,
+                         &length_starting,
+                         &length_implementation);
+
+    /* First pass. determine length of directory & base of data */
     for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
     {
         /* length of directory entry */
         int l = 3 + length_data_entry + length_starting;
         if (entry_p + l >= record_length)
         {
-            wrbuf_printf (wr, "<!-- Directory offset %d: end of record. "
-                            "Missing FS char -->\n", entry_p);
+            yaz_marc_cprintf(mt, "Directory offset %d: end of record."
+                             " Missing FS char", entry_p);
             return -1;
         }
         if (mt->debug)
-            wrbuf_printf (wr, "<!-- Directory offset %d: Tag %.3s -->\n",
-                            entry_p, buf+entry_p);
-        /* check for digits in length info */
+        {
+            yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
+                             entry_p, buf+entry_p);
+        }
+        /* Check for digits in length info */
         while (--l >= 3)
             if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
                 break;
         if (l >= 3)
         {
-            /* not all digits, so stop directory scan */
-            wrbuf_printf (wr, "<!-- Directory offset %d: Bad data for data "
-                            "length and/or length starting -->\n", entry_p);
+            /* Not all digits, so stop directory scan */
+            yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
+                             " length and/or length starting", entry_p);
             break;
         }
         entry_p += 3 + length_data_entry + length_starting;
@@ -286,71 +989,17 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
     end_of_directory = entry_p;
     if (base_address != entry_p+1)
     {
-        if (produce_warnings)
-            wrbuf_printf (wr,"<!-- Base address not at end of directory, "
-                          "base %d, end %d -->\n", base_address, entry_p+1);
+        yaz_marc_cprintf(mt, "Base address not at end of directory,"
+                         " base %d, end %d", base_address, entry_p+1);
     }
-    if (mt->xml == YAZ_MARC_ISO2709)
-    {
-        WRBUF wr_head = wrbuf_alloc();
-        WRBUF wr_dir = wrbuf_alloc();
-        WRBUF wr_tmp = wrbuf_alloc();
 
-        int data_p = 0;
-        /* second pass. create directory for ISO2709 output */
-        for (entry_p = 24; entry_p != end_of_directory; )
-        {
-            int data_length, data_offset, end_offset;
-            int i, sz1, sz2;
-            
-            wrbuf_write(wr_dir, buf+entry_p, 3);
-            entry_p += 3;
-            
-            data_length = atoi_n (buf+entry_p, length_data_entry);
-            entry_p += length_data_entry;
-            data_offset = atoi_n (buf+entry_p, length_starting);
-            entry_p += length_starting;
-            i = data_offset + base_address;
-            end_offset = i+data_length-1;
-            
-            if (data_length <= 0 || data_offset < 0 || end_offset >= record_length)
-                return -1;
-        
-            while (i < end_offset &&
-                    buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
-                i++;
-            sz1 = 1+i - (data_offset + base_address);
-            if (mt->iconv_cd)
-            {
-                sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
-                                        buf + data_offset+base_address, sz1);
-                wrbuf_rewind(wr_tmp);
-            }
-            else
-                sz2 = sz1;
-            wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
-            wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
-            data_p += sz2;
-        }
-        wrbuf_putc(wr_dir, ISO2709_FS);
-        wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
-        wrbuf_write(wr_head, lead+5, 7);
-        wrbuf_printf(wr_head, "%05d", base_address);
-        wrbuf_write(wr_head, lead+17, 7);
-
-        wrbuf_write(wr, wrbuf_buf(wr_head), 24);
-        wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
-        wrbuf_free(wr_head, 1);
-        wrbuf_free(wr_dir, 1);
-        wrbuf_free(wr_tmp, 1);
-    }
-    /* third pass. create data output */
+    /* Second pass. parse control - and datafields */
     for (entry_p = 24; entry_p != end_of_directory; )
     {
         int data_length;
         int data_offset;
         int end_offset;
-        int i, j;
+        int i;
         char tag[4];
         int identifier_flag = 0;
         int entry_p0 = entry_p;
@@ -358,9 +1007,9 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
         memcpy (tag, buf+entry_p, 3);
         entry_p += 3;
         tag[3] = '\0';
-        data_length = atoi_n (buf+entry_p, length_data_entry);
+        data_length = atoi_n(buf+entry_p, length_data_entry);
         entry_p += length_data_entry;
-        data_offset = atoi_n (buf+entry_p, length_starting);
+        data_offset = atoi_n(buf+entry_p, length_starting);
         entry_p += length_starting;
         i = data_offset + base_address;
         end_offset = i+data_length-1;
@@ -370,15 +1019,14 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
         
         if (mt->debug)
         {
-            wrbuf_printf(wr, "<!-- Directory offset %d: data-length %d, "
-                            "data-offset %d -->\n",
-                    entry_p0, data_length, data_offset);
+            yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
+                             " data-offset %d",
+                             tag, entry_p0, data_length, data_offset);
         }
         if (end_offset >= record_length)
         {
-            wrbuf_printf (wr,"<!-- Directory offset %d: Data out of bounds "
-                            "%d >= %d -->\n",
-                                   entry_p0, end_offset, record_length);
+            yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
+                             entry_p0, end_offset, record_length);
             break;
         }
         
@@ -393,200 +1041,60 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
                 identifier_flag = 2;
         }
 
-        if (mt->debug)
-        {
-            wrbuf_printf(wr, "<!-- identifier_flag = %d -->\n",
-                         identifier_flag);
-        } 
-       
-        switch(mt->xml)
-        {
-        case YAZ_MARC_LINE:
-            wrbuf_puts (wr, tag);
-            wrbuf_puts (wr, " ");
-            break;
-        case YAZ_MARC_SIMPLEXML:
-            wrbuf_printf (wr, "<field tag=\"");
-            marc_cdata(mt, tag, strlen(tag), wr);
-            wrbuf_printf(wr, "\"");
-            break;
-        case YAZ_MARC_OAIMARC:
-            if (identifier_flag)
-                wrbuf_printf (wr, "  <varfield id=\"");
-            else
-                wrbuf_printf (wr, "  <fixfield id=\"");
-            marc_cdata(mt, tag, strlen(tag), wr);
-            wrbuf_printf(wr, "\"");
-            break;
-        case YAZ_MARC_MARCXML:
-        case YAZ_MARC_XCHANGE:
-            if (identifier_flag)
-                wrbuf_printf (wr, "  <datafield tag=\"");
-            else
-                wrbuf_printf (wr, "  <controlfield tag=\"");
-            marc_cdata(mt, tag, strlen(tag), wr);
-            wrbuf_printf(wr, "\"");
-        }
-        
         if (identifier_flag)
         {
+            /* datafield */
             i += identifier_flag-1;
-            for (j = 0; j<indicator_length; j++, i++)
-            {
-                switch(mt->xml)
-                {
-                case YAZ_MARC_ISO2709:
-                    wrbuf_putc(wr, buf[i]);
-                    break;
-                case YAZ_MARC_LINE:
-                    wrbuf_putc(wr, buf[i]);
-                    break;
-                case YAZ_MARC_SIMPLEXML:
-                    wrbuf_printf(wr, " Indicator%d=\"", j+1);
-                    marc_cdata(mt, buf+i, 1, wr);
-                    wrbuf_printf(wr, "\"");
-                    break;
-                case YAZ_MARC_OAIMARC:
-                    wrbuf_printf(wr, " i%d=\"", j+1);
-                    marc_cdata(mt, buf+i, 1, wr);
-                    wrbuf_printf(wr, "\"");
-                    break;
-                case YAZ_MARC_MARCXML:
-                case YAZ_MARC_XCHANGE:
-                    wrbuf_printf(wr, " ind%d=\"", j+1);
-                    marc_cdata(mt, buf+i, 1, wr);
-                    wrbuf_printf(wr, "\"");
-                }
-            }
-        }
-        if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
-            || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE)
-        {
-            wrbuf_puts (wr, ">");
-            if (identifier_flag)
-                wrbuf_puts (wr, "\n");
-        }
-        if (identifier_flag)
-        {
+            yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
+            i += indicator_length;
+
             while (i < end_offset &&
                     buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
             {
-                int i0;
-
-                int sb_octet_length = identifier_length-1;
-                if (identifier_length == 2)
-                    sb_octet_length = cdata_one_character(mt, buf+i);
+                int code_offset = i+1;
 
-                i++;
-                switch(mt->xml)
-                {
-                case YAZ_MARC_ISO2709:
-                    --i;
-                    wrbuf_iconv_write(wr, mt->iconv_cd, 
-                                      buf+i, identifier_length);
-                    i += identifier_length;
-                    break;
-                case YAZ_MARC_LINE: 
-                    wrbuf_puts (wr, mt->subfield_str); 
-                    marc_cdata(mt, buf+i, sb_octet_length, wr);
-                    i = i+sb_octet_length;
-                    wrbuf_putc (wr, ' ');
-                    break;
-                case YAZ_MARC_SIMPLEXML:
-                    wrbuf_puts (wr, "  <subfield code=\"");
-                    marc_cdata(mt, buf+i, sb_octet_length, wr);
-                    i = i+sb_octet_length;
-                    wrbuf_puts (wr, "\">");
-                    break;
-                case YAZ_MARC_OAIMARC:
-                    wrbuf_puts (wr, "    <subfield label=\"");
-                    marc_cdata(mt, buf+i, sb_octet_length, wr);
-                    i = i+sb_octet_length;
-                    wrbuf_puts (wr, "\">");
-                    break;
-                case YAZ_MARC_MARCXML:
-                case YAZ_MARC_XCHANGE:
-                    wrbuf_puts (wr, "    <subfield code=\"");
-                    marc_cdata(mt, buf+i, sb_octet_length, wr);
-                    i = i+sb_octet_length;
-                    wrbuf_puts (wr, "\">");
-                    break;
-                }
-                i0 = i;
+                i ++;
                 while (i < end_offset &&
                         buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
-                        buf[i] != ISO2709_FS)
+                       buf[i] != ISO2709_FS)
                     i++;
-                marc_cdata(mt, buf + i0, i - i0, wr);
-
-                if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
-                    marc_cdata(mt, buf + i, 1, wr);
-
-                if (mt->xml == YAZ_MARC_SIMPLEXML || 
-                    mt->xml == YAZ_MARC_MARCXML ||
-                    mt->xml == YAZ_MARC_XCHANGE ||
-                    mt->xml == YAZ_MARC_OAIMARC)
-                    wrbuf_puts (wr, "</subfield>\n");
+                yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
             }
         }
         else
         {
+            /* controlfield */
             int i0 = i;
             while (i < end_offset && 
                 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
                 i++;
-            marc_cdata(mt, buf + i0, i - i0, wr);
-            if (mt->xml == YAZ_MARC_ISO2709)
-                marc_cdata(mt, buf + i, 1, wr);
+            yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
         }
-        if (mt->xml == YAZ_MARC_LINE)
-            wrbuf_puts (wr, mt->endline_str);
         if (i < end_offset)
-            wrbuf_printf(wr, "<!-- separator but not at end of field length=%d-->\n", data_length);
+        {
+            yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
+                    data_length);
+        }
         if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
-            wrbuf_printf(wr, "<!-- no separator at end of field length=%d-->\n", data_length);
-        switch(mt->xml)
         {
-        case YAZ_MARC_SIMPLEXML:
-            wrbuf_puts (wr, "</field>\n");
-            break;
-        case YAZ_MARC_OAIMARC:
-            if (identifier_flag)
-                wrbuf_puts (wr, "</varfield>\n");
-            else
-                wrbuf_puts (wr, "</fixfield>\n");
-            break;
-        case YAZ_MARC_MARCXML:
-        case YAZ_MARC_XCHANGE:
-            if (identifier_flag)
-                wrbuf_puts (wr, "  </datafield>\n");
-            else
-                wrbuf_puts (wr, "</controlfield>\n");
-            break;
+            yaz_marc_cprintf(mt, "No separator at end of field length=%d",
+                    data_length);
         }
     }
-    switch (mt->xml)
-    {
-    case YAZ_MARC_LINE:
-        wrbuf_puts (wr, "");
-        break;
-    case YAZ_MARC_SIMPLEXML:
-        wrbuf_puts (wr, "</iso2709>\n");
-        break;
-    case YAZ_MARC_OAIMARC:
-        wrbuf_puts (wr, "</oai_marc>\n");
-        break;
-    case YAZ_MARC_MARCXML:
-    case YAZ_MARC_XCHANGE:
-        wrbuf_puts (wr, "</record>\n");
-        break;
-    case YAZ_MARC_ISO2709:
-        wrbuf_putc (wr, ISO2709_RS);
-        break;
-    }
     return record_length;
 }
 
+int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
+{
+    int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
+    if (r <= 0)
+        return r;
+    s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
+    if (s != 0)
+        return -1; /* error */
+    return r; /* OK, return length > 0 */
+}
+
 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
                          char **result, int *rsize)
 {
@@ -618,7 +1126,7 @@ void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
     mt->iconv_cd = cd;
 }
 
-/* depricated */
+/* deprecated */
 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
 {
     yaz_marc_t mt = yaz_marc_create();
@@ -631,13 +1139,13 @@ int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
     return r;
 }
 
-/* depricated */
+/* deprecated */
 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
 {
     return yaz_marc_decode(buf, wr, debug, bsize, 0);
 }
 
-/* depricated */
+/* deprecated */
 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
 {
     yaz_marc_t mt = yaz_marc_create();
@@ -653,13 +1161,13 @@ int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
     return r;
 }
 
-/* depricated */
+/* deprecated */
 int marc_display_ex (const char *buf, FILE *outf, int debug)
 {
     return marc_display_exl (buf, outf, debug, -1);
 }
 
-/* depricated */
+/* deprecated */
 int marc_display (const char *buf, FILE *outf)
 {
     return marc_display_ex (buf, outf, 0);
index 3791082..70e23df 100644 (file)
@@ -2,12 +2,11 @@
  * Copyright (C) 1995-2005, Index Data ApS
  * All rights reserved.
  *
- * $Id: querytowrbuf.c,v 1.3 2006-01-20 14:44:55 adam Exp $
+ * $Id: querytowrbuf.c,v 1.4 2006-04-19 10:05:03 adam Exp $
  */
 
-/**
- * \file querytostr.c
- * \brief Query to WRBUF (to strings)
+/** \file querytowrbuf.c
+    \brief Query to WRBUF (to strings)
  */
 
 #include <stdio.h>
index 19c2ce2..47dbe9a 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2005, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: wrbuf.c,v 1.9 2005-09-27 17:52:46 adam Exp $
+ * $Id: wrbuf.c,v 1.10 2006-04-19 10:05:03 adam Exp $
  */
 
 /**
@@ -195,6 +195,11 @@ int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size)
     return wrbuf_iconv_write_x(b, cd, buf, size, 0);
 }
 
+int wrbuf_iconv_puts(WRBUF b, yaz_iconv_t cd, const char *strz)
+{
+    return wrbuf_iconv_write(b, cd, strz, strlen(strz));
+}
+
 int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, const char *buf, int size)
 {
     return wrbuf_iconv_write_x(b, cd, buf, size, 1);
index 9574be4..77184ee 100644 (file)
@@ -2,13 +2,12 @@
  * Copyright (C) 1995-2005, Index Data ApS
  * All rights reserved.
  *
- * $Id: xmlquery.c,v 1.6 2006-02-23 13:09:54 adam Exp $
+ * $Id: xmlquery.c,v 1.7 2006-04-19 10:05:03 adam Exp $
  */
 
-/**
- * \file querytostr.c
- * \brief Query / XML conversions
- */
+/** \file xmlquery.c
+    \brief Query / XML conversions
+*/
 
 #include <stdio.h>
 #include <string.h>
index 4173682..148b54e 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2005, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: zoom-c.c,v 1.68 2006-04-07 11:27:24 adam Exp $
+ * $Id: zoom-c.c,v 1.69 2006-04-19 10:05:03 adam Exp $
  */
 /**
  * \file zoom-c.c
@@ -1150,7 +1150,7 @@ static zoom_ret ZOOM_connection_send_init (ZOOM_connection c)
         ZOOM_options_get(c->options, "implementationName"),
         odr_prepend(c->odr_out, "ZOOM-C", ireq->implementationName));
 
-    version = odr_strdup(c->odr_out, "$Revision: 1.68 $");
+    version = odr_strdup(c->odr_out, "$Revision: 1.69 $");
     if (strlen(version) > 10)   /* check for unexpanded CVS strings */
         version[strlen(version)-2] = '\0';
     ireq->implementationVersion = odr_prepend(c->odr_out,
@@ -1865,7 +1865,7 @@ ZOOM_record_get (ZOOM_record rec, const char *type_spec, int *len)
         }
         return 0;
     }
-    else if (!strcmp (type, "xml") || !strcmp(type, "oai"))
+    else if (!strcmp (type, "xml"))
     {
         Z_External *r = (Z_External *) npr->u.databaseRecord;
         oident *ent = oid_getentbyoid(r->direct_reference);
@@ -1889,8 +1889,6 @@ ZOOM_record_get (ZOOM_record rec, const char *type_spec, int *len)
             const char *ret_buf;
             int marc_decode_type = YAZ_MARC_MARCXML;
 
-            if (!strcmp(type, "oai"))
-                marc_decode_type = YAZ_MARC_OAIMARC;
             switch (ent->value)
             {
             case VAL_SOIF:
index fd5eda6..7a1e037 100644 (file)
@@ -1,16 +1,21 @@
 ## Copyright (C) 1994-2006, Index Data
 ## All rights reserved.
-## $Id: Makefile.am,v 1.12 2006-04-01 11:45:23 adam Exp $
+## $Id: Makefile.am,v 1.13 2006-04-19 10:05:04 adam Exp $
 
 check_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf tstodr tstccl tstlog \
  tstsoap1 tstsoap2 tstodrstack tstlogthread tstxmlquery tstpquery
-check_SCRIPTS = tstcql.sh tstmarc.sh
+check_SCRIPTS = tstcql.sh tstmarciso.sh tstmarcxml.sh
 
 TESTS = $(check_PROGRAMS) $(check_SCRIPTS)
 
 EXTRA_DIST = tstodr.asn tstodrcodec.c tstodrcodec.h cqlsample \
  $(check_SCRIPTS) \
- marc1 marc1.xml marc2 marc2.xml marc3 marc3.xml marc4 marc4.xml
+ marc1 marc1.xml marc1.chr marc1.xml.marc \
+ marc2 marc2.xml marc2.chr marc2.xml.marc \
+ marc3 marc3.xml marc3.chr marc3.xml.marc \
+ marc4 marc4.xml marc4.chr marc4.xml.marc \
+ marc5 marc5.xml marc5.chr marc5.xml.marc \
+ marc6 marc6.xml marc6.chr marc6.xml.marc
 
 YAZCOMP = ../util/yaz-asncomp
 YAZCOMPLINE = $(YAZCOMP) -d z.tcl -i yaz -I../include $(YCFLAGS)
diff --git a/test/marc1.chr b/test/marc1.chr
new file mode 100644 (file)
index 0000000..328c81e
--- /dev/null
@@ -0,0 +1 @@
+iso-8859-1
index d93b391..75b3a95 100644 (file)
@@ -1,6 +1,6 @@
-<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
 <record xmlns="http://www.loc.gov/MARC21/slim">
-  <leader>00988nam0a32003011  450 </leader>
+<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
+  <leader>00988nam0 32003011  450 </leader>
   <datafield tag="001" ind1="0" ind2="0" ind3="0">
     <subfield code="a">9 181 423 4</subfield>
     <subfield code="b">710100</subfield>
diff --git a/test/marc1.xml.marc b/test/marc1.xml.marc
new file mode 100644 (file)
index 0000000..e899276
--- /dev/null
@@ -0,0 +1 @@
+00989nam0 32003011  450 001002800000004001000028008002800038009001100066021002700077032001500104100002100119245005400140250004100194260004000235300002700275504012300302512006100425520005000486652002400536652001600560666001600576666001900592666001500611666001800626666001900644666000900663666001500672\1e000\1fa9 181 423 4\1fb710100\1ffa\1e000\1frn\1fae\1e000\1ftm\1fuu\1fa2002\1fbus\1fleng\1fv0\1e000\1faa\1fgxx\1e000\1fa1-4000-4596-7\1fd$14,00\1e000\1f&DBC200439\1e000\1f0\1faSloman\1fhLarry\1e000\1faOn the road with Bob Dylan\1feLarry "Ratso" Sloman\1e000\1faRevised edition\1fbThree Rivers Press\1e000\1faNew York\1fbThree Rivers Press\1fc2002\1e000\1faxv, 464 sider, tavler\1e000\1faLarry "Ratso" Slomans meget personlige beretning om Bob Dylans koncertturne i USA i 1975: "The Rolling Thunder revue"\1e000\1faPå omslaget: With a new introduction by Kinky Friedman\1e000\1faTidligere: 1. udgave. New York, Bantam, 1978\1e000\1f0\1fm99.4\1faDylan\1fhBob\1e000\1fp78.9064\1fv5\1e000\1fffolkemusik\1e000\1fffolkemusikere\1e000\1ffrockmusik\1e000\1ffrockmusikere\1e000\1ffrockkoncerter\1e000\1feUSA\1e000\1fi1970-1979\1e\1d
\ No newline at end of file
diff --git a/test/marc2.chr b/test/marc2.chr
new file mode 100644 (file)
index 0000000..328c81e
--- /dev/null
@@ -0,0 +1 @@
+iso-8859-1
index ee51723..6faad3e 100644 (file)
@@ -1,6 +1,6 @@
-<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
 <record xmlns="http://www.loc.gov/MARC21/slim">
-  <leader>01116nam0a32002171  450 </leader>
+<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
+  <leader>01116nam0 32002171  450 </leader>
   <datafield tag="001" ind1="0" ind2="0" ind3="0">
     <subfield code="a">9 182 502 3</subfield>
     <subfield code="b">710100</subfield>
diff --git a/test/marc2.xml.marc b/test/marc2.xml.marc
new file mode 100644 (file)
index 0000000..5580029
--- /dev/null
@@ -0,0 +1 @@
+01121nam0 32002171  450 001002800000004001000028008002800038009001100066039000900077100001900086245005900105260003300164300001000197512007300207531001400280538001900294652001500313666004900328795050600377795002000883\1e000\1fa9 182 502 3\1fb710100\1ffa\1e000\1frc\1fae\1e000\1fts\1fuf\1fa1995\1fbgb\1fleng\1fv0\1e000\1fas\1fgxc\1e000\1fabef\1e000\1faMimms\1fhGarnet\1e000\1faCry baby\1faWarm and soulful\1feGarnet Mimms ... [et al.]\1e000\1faBury St. Edmunds\1fbBGO\1fc1995\1e000\1fn1 cd\1e000\1faIndspilninger publiceret 1963 (Cry baby) og 1965 (Warm and soulful)\1e000\1faIndhold:\1e000\1ffBGO\1fgBGOCD268\1e000\1fm78.794\1fv4\1e000\1fmsoul\1fmrhythm & blues\1fnvokal\1fp1960-1969\1flUSA\1e000\1få11\1faCry baby\1faNobody but you\1faUntil you were gone\1faAnytime you want me\1faSo close\1faFor your precious love\1faBaby don't you weep\1faA ¤quiet place\1faCry to me\1faDon't change your heart\1faWanting you\1faThe ¤truth hurts\1faI'll take good care of you\1faLooking for you\1faIt won't hurt (half as much)\1faIt was easier to hurt her\1faThinkin'\1faProve it to me\1faMore than a miracle\1faAs long as I have you\1faOne girl\1faThere goes my baby\1faIt's just a matter of time\1faA ¤little bit of soap\1faLook away\1faI'll make it up to you\1e000\1få40\1fy0\1fa1 girl\1e\1d
\ No newline at end of file
diff --git a/test/marc3.chr b/test/marc3.chr
new file mode 100644 (file)
index 0000000..328c81e
--- /dev/null
@@ -0,0 +1 @@
+iso-8859-1
index 6407ec2..53f62b8 100644 (file)
@@ -1,6 +1,6 @@
-<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
 <record xmlns="http://www.loc.gov/MARC21/slim">
-  <leader>00914naa a2200337   450 </leader>
+<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
+  <leader>00914naa  2200337   450 </leader>
   <datafield tag="001" ind1=" " ind2=" ">
     <subfield code="a">a00001508</subfield>
     <subfield code="f">a</subfield>
diff --git a/test/marc3.xml.marc b/test/marc3.xml.marc
new file mode 100644 (file)
index 0000000..9d5c4b7
--- /dev/null
@@ -0,0 +1 @@
+00914naa  2200337   450 001001700000004000900017008002400026009001000050041000800060041000800068097000700076245003300083300002600116557003300142630001600175630001300191633001000204633000900214648006100223648001600284648005400300J01000600354BAS000500360LKR004200365CAT003000407CAT003000437CAT003000467CAT003900497CAT002600536UID001400562\1e  \1faa00001508\1ffa\1e  \1fai\1frn\1e  \1fa1991\1fbxx\1flnor\1fta\1fv9\1e  \1faa\1fgxx\1e  \1fanor\1e  \1fdeng\1e00\1fa06\1e  \1faByfornyelse ved Ibsen-Ringen\1e  \1fbfarvefoto\1fbplan\1fbsnit\1e  \1faByggekunst\1fj1991\1fv1/2\1fk41-45\1e  \1ffbyfornyelse\1e  \1ffsanering\1e  \1ffNorge\1e  \1ffOslo\1e  \1faTelje Torp Aasen Arkitektkontor\1fcKristian Augustsgate 7B\1e  \1faEng, Dagfin\1e  \1franlund, Tom\1fcKristian Augustsgate\1fcPilestredet 19\1e  \1faa\1e  \1f0\1e  \1faITM\1flARK50\1fb0000145\1fy1991\1fi1/2\1fk41-45\1e  \1fa\1fb\1fc20020111\1flARK01\1fh2002\1e  \1fa\1fb\1fc20020111\1flARK01\1fh2116\1e  \1fa\1fb\1fc20021002\1flARK01\1fh1000\1e  \1faICLLOAD\1fb00\1fc20021122\1flARK01\1fh1948\1e  \1fc20030618\1flARK01\1fh1330\1e  \1faa00001508\1e\1d
\ No newline at end of file
diff --git a/test/marc4.chr b/test/marc4.chr
new file mode 100644 (file)
index 0000000..328c81e
--- /dev/null
@@ -0,0 +1 @@
+iso-8859-1
index fc8e91b..c2f2702 100644 (file)
@@ -1,9 +1,9 @@
+<record xmlns="http://www.loc.gov/MARC21/slim">
 <!-- Indicator length at offset 10 should hold a digit. Assuming 2 -->
 <!-- Identifier length at offset 11 should hold a digit. Assuming 2 -->
 <!-- Base address at offsets 12..16 should hold a number. Assuming 0 -->
 <!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->
-<record xmlns="http://www.loc.gov/MARC21/slim">
-  <leader>009140091a22a  22003370 </leader>
-<!-- Directory offset 24: Bad data for data length and/or length starting -->
+  <leader>009140091422a  22003370 </leader>
+<!-- Directory offset 24: Bad value for data length and/or length starting -->
 <!-- Base address not at end of directory, base 0, end 25 -->
 </record>
diff --git a/test/marc4.xml.marc b/test/marc4.xml.marc
new file mode 100644 (file)
index 0000000..a8f9ac9
--- /dev/null
@@ -0,0 +1 @@
+00026009142200025003370 \1e\1d
\ No newline at end of file
diff --git a/test/marc5.chr b/test/marc5.chr
new file mode 100644 (file)
index 0000000..a524421
--- /dev/null
@@ -0,0 +1 @@
+utf-8
diff --git a/test/marc5.xml b/test/marc5.xml
new file mode 100644 (file)
index 0000000..54323ee
--- /dev/null
@@ -0,0 +1,31 @@
+<record xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00492nam a22001455a 4500</leader>
+  <controlfield tag="001">000277485</controlfield>
+  <controlfield tag="005">20051026111436.0</controlfield>
+  <controlfield tag="008">050413s1894    gr            000 0 gre d</controlfield>
+  <datafield tag="100" ind1="1" ind2=" ">
+    <subfield code="a">Μαρούδης, Κωνσταντίνος Ιω</subfield>
+  </datafield>
+  <datafield tag="245" ind1="1" ind2="0">
+    <subfield code="a">Ελληνικόν κρυπτογραφικόν λεξικόν /</subfield>
+    <subfield code="c">Κωνστ. Ι. Μαρούδης.</subfield>
+  </datafield>
+  <datafield tag="250" ind1=" " ind2=" ">
+    <subfield code="η"> εκδ.</subfield>
+  </datafield>
+  <datafield tag="260" ind1=" " ind2=" ">
+    <subfield code="a">Αθήνα,</subfield>
+    <subfield code="c">1894.</subfield>
+  </datafield>
+  <datafield tag="300" ind1=" " ind2=" ">
+    <subfield code="a">248 σελ.</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2="0">
+    <subfield code="a">Greek language, Modern</subfield>
+    <subfield code="x">Dialects</subfield>
+    <subfield code="v">Dictionaries</subfield>
+  </datafield>
+  <datafield tag="650" ind1=" " ind2="0">
+    <subfield code="a">Cryptography.</subfield>
+  </datafield>
+</record>
diff --git a/test/marc5.xml.marc b/test/marc5.xml.marc
new file mode 100644 (file)
index 0000000..50102a7
--- /dev/null
@@ -0,0 +1 @@
+00492nam a22001455a 4500001001000000005001700010008004100027100005200068245010400120250001400224260002300238300001600261650005100277650001800328\1e000277485\1e20051026111436.0\1e050413s1894    gr            000 0 gre d\1e\1faΜαρούδης, Κωνσταντίνος Ιω\1e10\1faΕλληνικόν κρυπτογραφικόν λεξικόν /\1fcΚωνστ. Ι. Μαρούδης.\1e  \1fη εκδ.\1e  \1faΑθήνα,\1fc1894.\1e  \1fa248 σελ.\1e 0\1faGreek language, Modern\1fxDialects\1fvDictionaries\1e 0\1faCryptography.\1e\1d
\ No newline at end of file
diff --git a/test/marc6 b/test/marc6
new file mode 100644 (file)
index 0000000..c78fdce
--- /dev/null
@@ -0,0 +1 @@
+00366nam  22001698a 4500001001300000003000400013005001700017008004100034010001700179040001300075050001200088100001700100245003000117260001200147263000900159300001100168\1e   11224466 \1eDLC\1e00000000000000.0\1e910710c19910701nju           00010 eng  \1e  \1faDLC\1fcDLC\1e00\1fa123-xyz\1e10\1faJack Collins\1e10\1faHow to program a computer\1e\1faPenguin\1e  \1fa8710\1e  \1fap. cm.\1e  \1fa   11224466 \1e\1d
\ No newline at end of file
diff --git a/test/marc6.chr b/test/marc6.chr
new file mode 100644 (file)
index 0000000..f51f8e4
--- /dev/null
@@ -0,0 +1 @@
+marc-8
diff --git a/test/marc6.xml b/test/marc6.xml
new file mode 100644 (file)
index 0000000..2b8578d
--- /dev/null
@@ -0,0 +1,32 @@
+<record xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00366nam  22001698a 4500</leader>
+  <controlfield tag="001">   11224466 </controlfield>
+  <controlfield tag="003">DLC</controlfield>
+  <controlfield tag="005">00000000000000.0</controlfield>
+  <controlfield tag="008">910710c19910701nju           00010 eng  </controlfield>
+  <datafield tag="010" ind1=" " ind2=" ">
+    <subfield code="a">   11224466 </subfield>
+  </datafield>
+  <datafield tag="040" ind1=" " ind2=" ">
+    <subfield code="a">DLC</subfield>
+    <subfield code="c">DLC</subfield>
+  </datafield>
+  <datafield tag="050" ind1="0" ind2="0">
+    <subfield code="a">123-xyz</subfield>
+  </datafield>
+  <datafield tag="100" ind1="1" ind2="0">
+    <subfield code="a">Jack Collins</subfield>
+  </datafield>
+  <datafield tag="245" ind1="1" ind2="0">
+    <subfield code="a">How to program a computer</subfield>
+  </datafield>
+  <datafield tag="260" ind1="1" ind2=" ">
+    <subfield code="a">Penguin</subfield>
+  </datafield>
+  <datafield tag="263" ind1=" " ind2=" ">
+    <subfield code="a">8710</subfield>
+  </datafield>
+  <datafield tag="300" ind1=" " ind2=" ">
+    <subfield code="a">p. cm.</subfield>
+  </datafield>
+</record>
diff --git a/test/marc6.xml.marc b/test/marc6.xml.marc
new file mode 100644 (file)
index 0000000..6f62ae2
--- /dev/null
@@ -0,0 +1 @@
+00366nam  22001698a 4500001001300000003000400013005001700017008004100034010001700075040001300092050001200105100001700117245003000134260001200164263000900176300001100185\1e   11224466 \1eDLC\1e00000000000000.0\1e910710c19910701nju           00010 eng  \1e  \1fa   11224466 \1e  \1faDLC\1fcDLC\1e00\1fa123-xyz\1e10\1faJack Collins\1e10\1faHow to program a computer\1e\1faPenguin\1e  \1fa8710\1e  \1fap. cm.\1e\1d
\ No newline at end of file
diff --git a/test/tstmarc.sh b/test/tstmarc.sh
deleted file mode 100755 (executable)
index 97b1ab4..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-# $Id: tstmarc.sh,v 1.2 2004-11-16 17:12:28 adam Exp $
-srcdir=${srcdir:-.}
-ecode=0
-for f in ${srcdir}/marc?; do
-    NEW=`basename ${f}`.new.xml
-    OLD=${f}.xml
-    DIFF=`basename ${f}`.diff
-    ../util/yaz-marcdump -f iso-8859-1 -t utf-8 -X $f > $NEW
-    if test $? != "0"; then
-       echo "Failed decode of $f"
-       ecode=1
-    elif test -f $OLD; then
-        if diff $OLD $NEW >$DIFF; then
-           rm $DIFF
-           rm $NEW
-       else
-           echo "Differ in $f"
-           ecode=1
-       fi
-    else
-       echo "Making test $f for the first time"
-       if test -x /usr/bin/xmllint; then
-           if xmllint --noout $NEW >out 2>stderr; then
-               echo "XML for $f is OK"
-               mv $NEW $OLD
-           else
-               echo "XML for $f is invalid"
-               ecode=1
-           fi
-       else
-           echo "xmllint not found. install libxml2-utils"
-           ecode=1
-       fi
-    fi
-done
-exit $ecode
-
diff --git a/test/tstmarciso.sh b/test/tstmarciso.sh
new file mode 100755 (executable)
index 0000000..9bf324d
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/sh
+# $Id: tstmarciso.sh,v 1.1 2006-04-19 10:05:04 adam Exp $
+# Tests reading of ISO2709 and checks that we get identical MARCXML
+srcdir=${srcdir:-.}
+ecode=0
+for f in ${srcdir}/marc?; do
+    NEW=`basename ${f}`.new.xml
+    OLD=${f}.xml
+    DIFF=`basename ${f}`.diff
+    ../util/yaz-marcdump -f `cat ${f}.chr` -t utf-8 -X $f > $NEW
+    if test $? != "0"; then
+       echo "Failed decode of $f"
+       ecode=1
+    elif test -f $OLD; then
+        if diff $OLD $NEW >$DIFF; then
+           rm $DIFF
+           rm $NEW
+       else
+           echo "Differ in $f"
+           ecode=1
+       fi
+    else
+       echo "Making test $f for the first time"
+       if test -x /usr/bin/xmllint; then
+           if xmllint --noout $NEW >out 2>stderr; then
+               echo "XML for $f is OK"
+               mv $NEW $OLD
+           else
+               echo "XML for $f is invalid"
+               ecode=1
+           fi
+       else
+           echo "xmllint not found. install libxml2-utils"
+           ecode=1
+       fi
+    fi
+done
+exit $ecode
+
diff --git a/test/tstmarcxml.sh b/test/tstmarcxml.sh
new file mode 100755 (executable)
index 0000000..7a970fb
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/sh
+# $Id: tstmarcxml.sh,v 1.1 2006-04-19 10:05:04 adam Exp $
+# Tests reading of MARCXML and checks that we get identical ISO2709 output.
+srcdir=${srcdir:-.}
+ecode=0
+# Skip this test if Libxml2 support is not enabled
+../util/yaz-marcdump -x >/dev/null 2>&1
+if test $? = "3"; then
+    exit 0
+fi
+for f in ${srcdir}/marc?.xml; do
+    NEW=`basename ${f}`.new.marc
+    OLD=${f}.marc
+    DIFF=`basename ${f}`.diff
+    ../util/yaz-marcdump -f utf-8 -t utf-8 -x -I $f > $NEW
+    if test $? != "0"; then
+       echo "Failed decode of $f"
+       ecode=1
+    elif test -f $OLD; then
+        if diff $OLD $NEW >$DIFF; then
+           rm $DIFF
+           rm $NEW
+       else
+           echo "Differ in $f"
+           ecode=1
+       fi
+    else
+       echo "Making test $f for the first time"
+       mv $NEW $OLD
+    fi
+done
+exit $ecode
+
index 217feee..e056593 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (C) 1995-2005, Index Data ApS
+ * Copyright (C) 1995-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdump.c,v 1.35 2005-12-18 15:58:02 adam Exp $
+ * $Id: marcdump.c,v 1.36 2006-04-19 10:05:04 adam Exp $
  */
 
 #define _FILE_OFFSET_BITS 64
 #define SEEK_END 2
 #endif
 
+
+static char *prog;
+
 static void usage(const char *prog)
 {
-    fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
+    fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] [-I] [-v] [-s splitfname] file...\n",
              prog);
 } 
 
 #if HAVE_XML2
-void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
-    xmlNodePtr cur;
-    int size;
-    int i;
-    
-    assert(output);
-    size = (nodes) ? nodes->nodeNr : 0;
-    
-    fprintf(output, "Result (%d nodes):\n", size);
-    for(i = 0; i < size; ++i) {
-        assert(nodes->nodeTab[i]);
+static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
+{
+    xmlNodePtr ptr;
+    xmlDocPtr doc = xmlParseFile(fname);
+    if (!doc)
+        return;
+
+    ptr = xmlDocGetRootElement(doc);
+    if (ptr)
+    {
+        int r;
+        WRBUF wrbuf = wrbuf_alloc();
+        r = yaz_marc_read_xml(mt, ptr);
+        if (r)
+            fprintf(stderr, "yaz_marc_read_xml failed\n");
         
-        if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
+        yaz_marc_write_mode(mt, wrbuf);
+
+        fputs(wrbuf_buf(wrbuf), stdout);
+
+        wrbuf_free(wrbuf, 1);
+    }
+    xmlFreeDoc(doc);
+}
+#endif
+
+static void dump(const char *fname, const char *from, const char *to,
+                 int read_xml, int xml,
+                 int print_offset, const char *split_fname, int verbose,
+                 FILE *cfile)
+{
+    yaz_marc_t mt = yaz_marc_create();
+    yaz_iconv_t cd = 0;
+    
+    if (from && to)
+    {
+        cd = yaz_iconv_open(to, from);
+        if (!cd)
         {
-            xmlNsPtr ns;
-            
-            ns = (xmlNsPtr)nodes->nodeTab[i];
-            cur = (xmlNodePtr)ns->next;
-            if(cur->ns) { 
-                fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
-                    ns->prefix, ns->href, cur->ns->href, cur->name);
-            } else {
-                fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
-                    ns->prefix, ns->href, cur->name);
-            }
-        } 
-        else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
+            fprintf(stderr, "conversion from %s to %s "
+                    "unsupported\n", from, to);
+            exit(2);
+        }
+        yaz_marc_iconv(mt, cd);
+    }
+    yaz_marc_xml(mt, xml);
+    yaz_marc_debug(mt, verbose);
+
+    if (read_xml)
+    {
+#if HAVE_XML2
+        marcdump_read_xml(mt, fname);
+#else
+        return;
+#endif
+    }
+    else
+    {
+        FILE *inf = fopen(fname, "rb");
+        int count = 0;
+        int num = 1;
+        if (!inf)
         {
-            cur = nodes->nodeTab[i];        
-            if(cur->ns) { 
-                fprintf(output, "= element node \"%s:%s\"\n", 
-                    cur->ns->href, cur->name);
-            } 
-            else
-            {
-                fprintf(output, "= element node \"%s\"\n", 
-                    cur->name);
-            }
+            fprintf (stderr, "%s: cannot open %s:%s\n",
+                     prog, fname, strerror (errno));
+            exit(1);
         }
-        else
+        if (cfile)
+            fprintf (cfile, "char *marc_records[] = {\n");
+        if (1)
         {
-            cur = nodes->nodeTab[i];    
-            fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
+            int marc_no = 0;
+            for(;; marc_no++)
+            {
+                int len;
+                char *result = 0;
+                int rlen;
+                size_t r;
+                char buf[100001];
+                
+                r = fread (buf, 1, 5, inf);
+                if (r < 5)
+                {
+                    if (r && print_offset && verbose)
+                        printf ("<!-- Extra %d bytes at end of file -->\n", r);
+                    break;
+                }
+                while (*buf < '0' || *buf > '9')
+                {
+                    int i;
+                    long off = ftell(inf) - 5;
+                    if (verbose || print_offset)
+                        printf("<!-- Skipping bad byte %d (0x%02X) at offset "
+                               "%ld (0x%lx) -->\n", 
+                               *buf & 0xff, *buf & 0xff,
+                               off, off);
+                    for (i = 0; i<4; i++)
+                        buf[i] = buf[i+1];
+                    r = fread(buf+4, 1, 1, inf);
+                    if (r < 1)
+                        break;
+                }
+                if (r < 1)
+                {
+                    if (verbose || print_offset)
+                        printf ("<!-- End of file with data -->\n");
+                    break;
+                }
+                if (print_offset)
+                {
+                    long off = ftell(inf) - 5;
+                    printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
+                            num, off, off);
+                }
+                len = atoi_n(buf, 5);
+                if (len < 25 || len > 100000)
+                {
+                    long off = ftell(inf) - 5;
+                    printf("Bad Length %d read at offset %ld (%lx)\n",
+                           len, (long) off, (long) off);
+                    break;
+                }
+                rlen = len - 5;
+                r = fread (buf + 5, 1, rlen, inf);
+                if (r < rlen)
+                    break;
+                if (split_fname)
+                {
+                    char fname[256];
+                    FILE *sf;
+                    sprintf(fname, "%.200s%07d", split_fname, marc_no);
+                    sf = fopen(fname, "wb");
+                    if (!sf)
+                    {
+                        fprintf(stderr, "Could not open %s\n", fname);
+                        split_fname = 0;
+                    }
+                    else
+                    {
+                        if (fwrite(buf, 1, len, sf) != len)
+                        {
+                            fprintf(stderr, "Could write content to %s\n",
+                                    fname);
+                            split_fname = 0;
+                        }
+                        fclose(sf);
+                    }
+                }
+                r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
+                if (r > 0 && result)
+                {
+                    fwrite (result, rlen, 1, stdout);
+                }
+                if (r > 0 && cfile)
+                {
+                    char *p = buf;
+                    int i;
+                    if (count)
+                        fprintf (cfile, ",");
+                    fprintf (cfile, "\n");
+                    for (i = 0; i < r; i++)
+                    {
+                        if ((i & 15) == 0)
+                            fprintf (cfile, "  \"");
+                        fprintf (cfile, "\\x%02X", p[i] & 255);
+                        
+                        if (i < r - 1 && (i & 15) == 15)
+                            fprintf (cfile, "\"\n");
+                        
+                    }
+                    fprintf (cfile, "\"\n");
+                }
+                num++;
+                if (verbose)
+                    printf("\n");
+            }
+            count++;
         }
+        if (cfile)
+            fprintf (cfile, "};\n");
+        fclose(inf);
     }
+    if (cd)
+        yaz_iconv_close(cd);
+    yaz_marc_destroy(mt);
 }
-#endif
 
 int main (int argc, char **argv)
 {
     int r;
-    int libxml_dom_test = 0;
     int print_offset = 0;
     char *arg;
     int verbose = 0;
-    FILE *inf;
-    char buf[100001];
-    char *prog = *argv;
     int no = 0;
     int xml = 0;
     FILE *cfile = 0;
     char *from = 0, *to = 0;
-    int num = 1;
+    int read_xml = 0;
     const char *split_fname = 0;
     
 #if HAVE_LOCALE_H
@@ -126,9 +264,9 @@ int main (int argc, char **argv)
 #endif
 #endif
 
-    while ((r = options("pvc:xOeXIf:t:2s:", argv, argc, &arg)) != -2)
+    prog = *argv;
+    while ((r = options("pvc:xOeXIf:t:s:", argv, argc, &arg)) != -2)
     {
-        int count;
         no++;
         switch (r)
         {
@@ -144,10 +282,18 @@ int main (int argc, char **argv)
             cfile = fopen(arg, "w");
             break;
         case 'x':
-            xml = YAZ_MARC_SIMPLEXML;
+#if HAVE_XML2
+            read_xml = 1;
+#else
+            fprintf(stderr, "%s: -x not supported."
+                    " YAZ not compiled with Libxml2 support\n", prog);
+            exit(3);
+#endif
             break;
         case 'O':
-            xml = YAZ_MARC_OAIMARC;
+            fprintf(stderr, "%s: OAI MARC no longer supported."
+                    " Use MARCXML instead.\n", prog);
+            exit(1);
             break;
         case 'e':
             xml = YAZ_MARC_XCHANGE;
@@ -161,187 +307,12 @@ int main (int argc, char **argv)
         case 'p':
             print_offset = 1;
             break;
-        case '2':
-            libxml_dom_test = 1;
-            break;
         case 's':
             split_fname = arg;
             break;
         case 0:
-            inf = fopen(arg, "rb");
-            count = 0;
-            if (!inf)
-            {
-                fprintf (stderr, "%s: cannot open %s:%s\n",
-                         prog, arg, strerror (errno));
-                exit(1);
-            }
-            if (cfile)
-                fprintf (cfile, "char *marc_records[] = {\n");
-            if (1)
-            {
-                yaz_marc_t mt = yaz_marc_create();
-                yaz_iconv_t cd = 0;
-                int marc_no = 0;
-
-                if (from && to)
-                {
-                    cd = yaz_iconv_open(to, from);
-                    if (!cd)
-                    {
-                        fprintf(stderr, "conversion from %s to %s "
-                                "unsupported\n", from, to);
-                        exit(2);
-                    }
-                    yaz_marc_iconv(mt, cd);
-                }
-                yaz_marc_xml(mt, xml);
-                yaz_marc_debug(mt, verbose);
-                for(;; marc_no++)
-                {
-                    int len;
-                    char *result = 0;
-                    int rlen;
-                    
-                    r = fread (buf, 1, 5, inf);
-                    if (r < 5)
-                    {
-                        if (r && print_offset && verbose)
-                            printf ("<!-- Extra %d bytes at end of file -->\n", r);
-                        break;
-                    }
-                    while (*buf < '0' || *buf > '9')
-                    {
-                        int i;
-                        long off = ftell(inf) - 5;
-                        if (verbose || print_offset)
-                            printf("<!-- Skipping bad byte %d (0x%02X) at offset "
-                                   "%ld (0x%lx) -->\n", 
-                                   *buf & 0xff, *buf & 0xff,
-                                   off, off);
-                        for (i = 0; i<4; i++)
-                            buf[i] = buf[i+1];
-                        r = fread(buf+4, 1, 1, inf);
-                        if (r < 1)
-                            break;
-                    }
-                    if (r < 1)
-                    {
-                        if (verbose || print_offset)
-                            printf ("<!-- End of file with data -->\n");
-                        break;
-                    }
-                    if (print_offset)
-                    {
-                        long off = ftell(inf) - 5;
-                        printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
-                                num, off, off);
-                    }
-                    len = atoi_n(buf, 5);
-                    if (len < 25 || len > 100000)
-                    {
-                        long off = ftell(inf) - 5;
-                        printf("Bad Length %d read at offset %ld (%lx)\n",
-                               len, (long) off, (long) off);
-                        break;
-                    }
-                    rlen = len - 5;
-                    r = fread (buf + 5, 1, rlen, inf);
-                    if (r < rlen)
-                        break;
-                    if (split_fname)
-                    {
-                        char fname[256];
-                        FILE *sf;
-                        sprintf(fname, "%.200s%07d", split_fname, marc_no);
-                        sf = fopen(fname, "wb");
-                        if (!sf)
-                        {
-                            fprintf(stderr, "Could not open %s\n", fname);
-                            split_fname = 0;
-                        }
-                        else
-                        {
-                            if (fwrite(buf, 1, len, sf) != len)
-                            {
-                                fprintf(stderr, "Could write content to %s\n",
-                                        fname);
-                                split_fname = 0;
-                            }
-                            fclose(sf);
-                        }
-                    }
-                    r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
-                    if (result)
-                        fwrite (result, rlen, 1, stdout);
-#if HAVE_XML2
-                    if (r > 0 && libxml_dom_test)
-                    {
-                        xmlDocPtr doc = xmlParseMemory(result, rlen);
-                        if (!doc)
-                            fprintf(stderr, "xmLParseMemory failed\n");
-                        else
-                        {
-                            int i;
-                            xmlXPathContextPtr xpathCtx; 
-                            xmlXPathObjectPtr xpathObj; 
-                            static const char *xpathExpr[] = {
-                                "/record/datafield[@tag='245']/subfield[@code='a']",
-                                "/record/datafield[@tag='100']/subfield",
-                                "/record/datafield[@tag='245']/subfield[@code='a']",
-                                "/record/datafield[@tag='650']/subfield",
-                                "/record/datafield[@tag='650']",
-                                0};
-                            
-                            xpathCtx = xmlXPathNewContext(doc);
-
-                            for (i = 0; xpathExpr[i]; i++) {
-                                xpathObj = xmlXPathEvalExpression(BAD_CAST xpathExpr[i], xpathCtx);
-                                if(xpathObj == NULL) {
-                                    fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
-                                }
-                                else
-                                {
-                                    print_xpath_nodes(xpathObj->nodesetval, stdout);
-                                    xmlXPathFreeObject(xpathObj);
-                                }
-                            }
-                            xmlXPathFreeContext(xpathCtx); 
-                            xmlFreeDoc(doc);
-                        }
-                    }
-#endif
-                    if (r > 0 && cfile)
-                    {
-                        char *p = buf;
-                        int i;
-                        if (count)
-                            fprintf (cfile, ",");
-                        fprintf (cfile, "\n");
-                        for (i = 0; i < r; i++)
-                        {
-                            if ((i & 15) == 0)
-                                fprintf (cfile, "  \"");
-                            fprintf (cfile, "\\x%02X", p[i] & 255);
-                            
-                            if (i < r - 1 && (i & 15) == 15)
-                                fprintf (cfile, "\"\n");
-                            
-                        }
-                        fprintf (cfile, "\"\n");
-                    }
-                    num++;
-                    if (verbose)
-                        printf("\n");
-                }
-                count++;
-                if (cd)
-                    yaz_iconv_close(cd);
-                yaz_marc_destroy(mt);
-            }
-            if (cfile)
-                fprintf (cfile, "};\n");
-            fclose(inf);
+            dump(arg, from, to, read_xml, xml,
+                 print_offset, split_fname, verbose, cfile);
             break;
         case 'v':
             verbose++;