Split MARC reader functions out of marcdisp.c. Prepare for MARC format
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 15 Dec 2006 12:37:17 +0000 (12:37 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 15 Dec 2006 12:37:17 +0000 (12:37 +0000)
line reader.

include/yaz/marcdisp.h
src/Makefile.am
src/marc_read_iso2709.c [new file with mode: 0644]
src/marc_read_line.c [new file with mode: 0644]
src/marc_read_xml.c [new file with mode: 0644]
src/marcdisp.c
win/makefile

index 97fad09..1b8e818 100644 (file)
@@ -24,7 +24,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/* $Id: marcdisp.h,v 1.21 2006-12-13 11:25:17 adam Exp $ */
+/* $Id: marcdisp.h,v 1.22 2006-12-15 12:37:17 adam Exp $ */
 
 /**
  * \file marcdisp.h
@@ -38,6 +38,7 @@
 #include <stdio.h>
 #include <yaz/wrbuf.h>
 
+#include <yaz/nmem.h>
 #include <yaz/xmltypes.h>
 
 YAZ_BEGIN_CDECL
@@ -147,6 +148,21 @@ YAZ_EXPORT int atoi_n(const char *buf, int len);
 YAZ_EXPORT int yaz_marc_read_iso2709(yaz_marc_t mt,
                                      const char *buf, int bsize);
 
+/** \brief read MARC lineformat from stream
+    \param mt handle
+    \param getbyte get one byte handler
+    \param ungetbyte unget one byte handler
+    \param client_data opaque data for handers
+
+    Parses MARC line record from stream
+    Returns > 0 for OK (same as length), -1=ERROR
+*/
+YAZ_EXPORT 
+int yaz_marc_read_line(yaz_marc_t mt,
+                       int (*getbyte)(void *client_data),
+                       void (*ungetbyte)(int b, void *client_data),
+                       void *client_data);
+
 /** \brief parses MARCXML/MarcXchange record from xmlNode pointer 
     \param mt handle
     \param ptr is a pointer to root xml node 
@@ -211,6 +227,113 @@ YAZ_EXPORT int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wrbuf);
     
 */  
 YAZ_EXPORT int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec);
+
+
+/** \brief sets leader, validates it, and returns important values
+    \param mt handle
+    \param leader of the 24 byte leader to be set
+    \param indicator_length indicator length (returned value)
+    \param identifier_length identifier length (returned value)
+    \param base_address base address (returned value)
+    \param length_data_entry length of data entry (returned value)
+    \param length_starting length of starting 
+    \param length_implementation length of implementation defined data
+*/
+YAZ_EXPORT
+void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
+                         int *indicator_length,
+                         int *identifier_length,
+                         int *base_address,
+                         int *length_data_entry,
+                         int *length_starting,
+                         int *length_implementation);
+
+
+/** \brief adds MARC comment string
+    \param mt handle
+    \param comment comment to be added)
+*/  
+YAZ_EXPORT
+void yaz_marc_add_comment(yaz_marc_t mt, char *comment);
+
+/** \brief adds MARC annotation - printf interface
+    \param mt handle
+    \param fmt printf format string
+*/  
+YAZ_EXPORT
+void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...);
+
+/** \brief adds subfield to MARC structure
+    \param mt handle
+    \param code_data code data buffer
+    \param code_data_len length of code data
+*/  
+YAZ_EXPORT
+void yaz_marc_add_subfield(yaz_marc_t mt,
+                           const char *code_data, size_t code_data_len);
+
+
+/** \brief adds controlfield to MARC structure
+    \param mt handle
+    \param tag (e.g. "001"
+    \param data value for this tag
+    \param data_len length of data
+*/  
+YAZ_EXPORT
+void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
+                               const char *data, size_t data_len);
+
+
+/** \brief adds controlfield to MARC structure using xml Nodes
+    \param mt handle
+    \param ptr_tag value of tag (TEXT xmlNode)
+    \param ptr_data value of data (TEXT xmlNode)
+*/  
+YAZ_EXPORT
+void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+                                   const xmlNode *ptr_data);
+
+
+/** \brief adds datafield to MARC structure using strings
+    \param mt handle
+    \param tag value of tag as string
+    \param indicator indicator string
+    \param indicator_len length of indicator string
+*/  
+YAZ_EXPORT
+void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
+                            const char *indicator, size_t indicator_len);
+
+/** \brief adds datafield to MARC structure using xml Nodes
+    \param mt handle
+    \param ptr_tag value of tag (TEXT xmlNode)
+    \param indicator indicator string
+    \param indicator_len length of indicator string
+*/  
+YAZ_EXPORT
+void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+                                const char *indicator, size_t indicator_len);
+
+
+/** \brief returns memory for MARC handle
+    \param mt handle
+    \retval NMEM handle for MARC system
+*/  
+YAZ_EXPORT
+NMEM yaz_marc_get_nmem(yaz_marc_t mt);
+
+/** \brief clears memory and MARC record
+    \param mt handle
+*/  
+YAZ_EXPORT
+void yaz_marc_reset(yaz_marc_t mt);
+
+/** \brief gets debug level for MARC system
+    \param mt handle
+*/  
+YAZ_EXPORT
+int yaz_marc_get_debug(yaz_marc_t mt);
+
 YAZ_END_CDECL
 
 #endif
index 9d538f1..324cb1e 100644 (file)
@@ -1,6 +1,6 @@
 ## This file is part of the YAZ toolkit.
 ## Copyright (C) 1994-2006, Index Data, All rights reserved.
-## $Id: Makefile.am,v 1.45 2006-11-29 12:34:51 heikki Exp $
+## $Id: Makefile.am,v 1.46 2006-12-15 12:37:18 adam Exp $
 
 YAZ_VERSION_INFO=2:1:0
 
@@ -49,7 +49,9 @@ diagsrw.c $(top_srcdir)/include/yaz/diagsrw.h: csvtosrw.tcl srw.csv
 diagsru_update.c $(top_srcdir)/include/yaz/diagsru_update.h: csvtosru_update.tcl sru_update.csv
        $(TCLSH) $(srcdir)/csvtosru_update.tcl $(srcdir)
 
-libyaz_la_SOURCES=version.c options.c log.c marcdisp.c oid.c wrbuf.c \
+libyaz_la_SOURCES=version.c options.c log.c \
+  marcdisp.c marc_read_xml.c marc_read_iso2709.c marc_read_line.c \
+  oid.c wrbuf.c \
   nmemsdup.c xmalloc.c readconf.c tpath.c nmem.c matchstr.c atoin.c \
   siconv.c marc8.c marc8r.c \
   odr_bool.c ber_bool.c ber_len.c ber_tag.c odr_util.c \
diff --git a/src/marc_read_iso2709.c b/src/marc_read_iso2709.c
new file mode 100644 (file)
index 0000000..aee41b4
--- /dev/null
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 1995-2006, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: marc_read_iso2709.c,v 1.1 2006-12-15 12:37:18 adam Exp $
+ */
+
+/**
+ * \file marc_read_iso2709.c
+ * \brief Implements reading of MARC as ISO2709
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <yaz/marcdisp.h>
+#include <yaz/wrbuf.h>
+#include <yaz/yaz-util.h>
+
+int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
+{
+    int entry_p;
+    int record_length;
+    int indicator_length;
+    int identifier_length;
+    int end_of_directory;
+    int base_address;
+    int length_data_entry;
+    int length_starting;
+    int length_implementation;
+
+    yaz_marc_reset(mt);
+
+    record_length = atoi_n (buf, 5);
+    if (record_length < 25)
+    {
+        yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
+        return -1;
+    }
+    /* ballout if bsize is known and record_length is less than that */
+    if (bsize != -1 && record_length > bsize)
+    {
+        yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
+                         record_length, bsize);
+        return -1;
+    }
+    if (yaz_marc_get_debug(mt))
+        yaz_marc_cprintf(mt, "Record length         %5d", record_length);
+
+    yaz_marc_set_leader(mt, buf,
+                        &indicator_length,
+                        &identifier_length,
+                        &base_address,
+                        &length_data_entry,
+                        &length_starting,
+                        &length_implementation);
+
+    /* First pass. determine length of directory & base of data */
+    for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+    {
+        /* length of directory entry */
+        int l = 3 + length_data_entry + length_starting;
+        if (entry_p + l >= record_length)
+        {
+            yaz_marc_cprintf(mt, "Directory offset %d: end of record."
+                             " Missing FS char", entry_p);
+            return -1;
+        }
+        if (yaz_marc_get_debug(mt))
+        {
+            yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
+                             entry_p, buf+entry_p);
+        }
+        /* Check for digits in length info */
+        while (--l >= 3)
+            if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
+                break;
+        if (l >= 3)
+        {
+            /* Not all digits, so stop directory scan */
+            yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
+                             " length and/or length starting", entry_p);
+            break;
+        }
+        entry_p += 3 + length_data_entry + length_starting;
+    }
+    end_of_directory = entry_p;
+    if (base_address != entry_p+1)
+    {
+        yaz_marc_cprintf(mt, "Base address not at end of directory,"
+                         " base %d, end %d", base_address, entry_p+1);
+    }
+
+    /* Second pass. parse control - and datafields */
+    for (entry_p = 24; entry_p != end_of_directory; )
+    {
+        int data_length;
+        int data_offset;
+        int end_offset;
+        int i;
+        char tag[4];
+        int identifier_flag = 0;
+        int entry_p0 = entry_p;
+
+        memcpy (tag, buf+entry_p, 3);
+        entry_p += 3;
+        tag[3] = '\0';
+        data_length = atoi_n(buf+entry_p, length_data_entry);
+        entry_p += length_data_entry;
+        data_offset = atoi_n(buf+entry_p, length_starting);
+        entry_p += length_starting;
+        i = data_offset + base_address;
+        end_offset = i+data_length-1;
+
+        if (data_length <= 0 || data_offset < 0)
+            break;
+        
+        if (yaz_marc_get_debug(mt))
+        {
+            yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
+                             " data-offset %d",
+                             tag, entry_p0, data_length, data_offset);
+        }
+        if (end_offset >= record_length)
+        {
+            yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
+                             entry_p0, end_offset, record_length);
+            break;
+        }
+        
+        if (memcmp (tag, "00", 2))
+            identifier_flag = 1;  /* if not 00X assume subfields */
+        else if (indicator_length < 4 && indicator_length > 0)
+        {
+            /* Danmarc 00X have subfields */
+            if (buf[i + indicator_length] == ISO2709_IDFS)
+                identifier_flag = 1;
+            else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
+                identifier_flag = 2;
+        }
+
+        if (identifier_flag)
+        {
+            /* datafield */
+            i += identifier_flag-1;
+            yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
+            i += indicator_length;
+
+            while (i < end_offset &&
+                    buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
+            {
+                int code_offset = i+1;
+
+                i ++;
+                while (i < end_offset &&
+                        buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
+                       buf[i] != ISO2709_FS)
+                    i++;
+                yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
+            }
+        }
+        else
+        {
+            /* controlfield */
+            int i0 = i;
+            while (i < end_offset && 
+                buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
+                i++;
+            yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
+        }
+        if (i < end_offset)
+        {
+            yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
+                    data_length);
+        }
+        if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
+        {
+            yaz_marc_cprintf(mt, "No separator at end of field length=%d",
+                    data_length);
+        }
+    }
+    return record_length;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
diff --git a/src/marc_read_line.c b/src/marc_read_line.c
new file mode 100644 (file)
index 0000000..c05af1b
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 1995-2006, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: marc_read_line.c,v 1.1 2006-12-15 12:37:18 adam Exp $
+ */
+
+/**
+ * \file marc_read_iso2709.c
+ * \brief Implements reading of MARC as ISO2709
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <yaz/marcdisp.h>
+#include <yaz/wrbuf.h>
+#include <yaz/yaz-util.h>
+
+int yaz_marc_read_line(yaz_marc_t mt,
+                       int (*getbyte)(void *client_data),
+                       void (*ungetbyte)(int b, void *client_data),
+                       void *client_data)
+{
+    yaz_marc_reset(mt);
+
+    return -1;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
diff --git a/src/marc_read_xml.c b/src/marc_read_xml.c
new file mode 100644 (file)
index 0000000..2642dd4
--- /dev/null
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 1995-2006, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: marc_read_xml.c,v 1.1 2006-12-15 12:37:18 adam Exp $
+ */
+
+/**
+ * \file marc_read_xml.c
+ * \brief Implements reading of MARC as XML
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <yaz/marcdisp.h>
+#include <yaz/wrbuf.h>
+#include <yaz/yaz-util.h>
+#include <yaz/nmem_xml.h>
+
+#if YAZ_HAVE_XML2
+#include <libxml/tree.h>
+#endif
+
+#if YAZ_HAVE_XML2
+int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
+{
+    NMEM nmem = yaz_marc_get_nmem(mt);
+    for (; ptr; ptr = ptr->next)
+    {
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "subfield"))
+            {
+                size_t ctrl_data_len = 0;
+                char *ctrl_data_buf = 0;
+                const xmlNode *p = 0, *ptr_code = 0;
+                struct _xmlAttr *attr;
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (!strcmp((const char *)attr->name, "code"))
+                        ptr_code = attr->children;
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'subfield'",
+                            attr->name);
+                        return -1;
+                    }
+                if (!ptr_code)
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing attribute 'code' for 'subfield'" );
+                    return -1;
+                }
+                if (ptr_code->type == XML_TEXT_NODE)
+                {
+                    ctrl_data_len = 
+                        strlen((const char *)ptr_code->content);
+                }
+                else
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing value for 'code' in 'subfield'" );
+                    return -1;
+                }
+                for (p = ptr->children; p ; p = p->next)
+                    if (p->type == XML_TEXT_NODE)
+                        ctrl_data_len += strlen((const char *)p->content);
+                ctrl_data_buf = nmem_malloc(nmem, ctrl_data_len+1);
+                strcpy(ctrl_data_buf, (const char *)ptr_code->content);
+                for (p = ptr->children; p ; p = p->next)
+                    if (p->type == XML_TEXT_NODE)
+                        strcat(ctrl_data_buf, (const char *)p->content);
+                yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
+            }
+            else
+            {
+                yaz_marc_cprintf(
+                    mt, "Expected element 'subfield', got '%.80s'", ptr->name);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
+{
+    int indicator_length;
+    int identifier_length;
+    int base_address;
+    int length_data_entry;
+    int length_starting;
+    int length_implementation;
+    const char *leader = 0;
+    const xmlNode *ptr = *ptr_p;
+
+    for(; ptr; ptr = ptr->next)
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "leader"))
+            {
+                xmlNode *p = ptr->children;
+                for(; p; p = p->next)
+                    if (p->type == XML_TEXT_NODE)
+                        leader = (const char *) p->content;
+                break;
+            }
+            else
+            {
+                yaz_marc_cprintf(
+                    mt, "Expected element 'leader', got '%.80s'", ptr->name);
+                return -1;
+            }
+        }
+    if (!leader)
+    {
+        yaz_marc_cprintf(mt, "Missing element 'leader'");
+        return -1;
+    }
+    if (strlen(leader) != 24)
+    {
+        yaz_marc_cprintf(mt, "Bad length %d of leader data."
+                         " Must have length of 24 characters", strlen(leader));
+        return -1;
+    }
+    yaz_marc_set_leader(mt, leader,
+                        &indicator_length,
+                        &identifier_length,
+                        &base_address,
+                        &length_data_entry,
+                        &length_starting,
+                        &length_implementation);
+    *ptr_p = ptr;
+    return 0;
+}
+
+static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
+{
+    for(; ptr; ptr = ptr->next)
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "controlfield"))
+            {
+                const xmlNode *ptr_tag = 0;
+                struct _xmlAttr *attr;
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (!strcmp((const char *)attr->name, "tag"))
+                        ptr_tag = attr->children;
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'controlfield'",
+                            attr->name);
+                        return -1;
+                    }
+                if (!ptr_tag)
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing attribute 'tag' for 'controlfield'" );
+                    return -1;
+                }
+                yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
+            }
+            else if (!strcmp((const char *) ptr->name, "datafield"))
+            {
+                char indstr[11]; /* 0(unused), 1,....9, + zero term */
+                const xmlNode *ptr_tag = 0;
+                struct _xmlAttr *attr;
+                int i;
+                for (i = 0; i<11; i++)
+                    indstr[i] = '\0';
+                for (attr = ptr->properties; attr; attr = attr->next)
+                    if (!strcmp((const char *)attr->name, "tag"))
+                        ptr_tag = attr->children;
+                    else if (strlen((const char *)attr->name) == 4 &&
+                             !memcmp(attr->name, "ind", 3))
+                    {
+                        int no = atoi((const char *)attr->name+3);
+                        if (attr->children
+                            && attr->children->type == XML_TEXT_NODE)
+                            indstr[no] = attr->children->content[0];
+                    }
+                    else
+                    {
+                        yaz_marc_cprintf(
+                            mt, "Bad attribute '%.80s' for 'datafield'",
+                            attr->name);
+                        return -1;
+                    }
+                if (!ptr_tag)
+                {
+                    yaz_marc_cprintf(
+                        mt, "Missing attribute 'tag' for 'datafield'" );
+                    return -1;
+                }
+                /* note that indstr[0] is unused so we use indstr[1..] */
+                yaz_marc_add_datafield_xml(mt, ptr_tag,
+                                           indstr+1, strlen(indstr+1));
+                
+                if (yaz_marc_read_xml_subfields(mt, ptr->children))
+                    return -1;
+            }
+            else
+            {
+                yaz_marc_cprintf(mt,
+                                 "Expected element controlfield or datafield,"
+                                 " got %.80s", ptr->name);
+                return -1;
+            }
+        }
+    return 0;
+}
+#endif
+
+int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
+{
+#if YAZ_HAVE_XML2
+    for(; ptr; ptr = ptr->next)
+        if (ptr->type == XML_ELEMENT_NODE)
+        {
+            if (!strcmp((const char *) ptr->name, "record"))
+                break;
+            else
+            {
+                yaz_marc_cprintf(
+                    mt, "Unknown element '%.80s' in MARC XML reader",
+                    ptr->name);
+                return -1;
+            }
+        }
+    if (!ptr)
+    {
+        yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
+        return -1;
+    }
+    /* ptr points to record node now */
+    ptr = ptr->children;
+    if (yaz_marc_read_xml_leader(mt, &ptr))
+        return -1;
+    return yaz_marc_read_xml_fields(mt, ptr->next);
+#else
+    return -1;
+#endif
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index 994c5a6..321cd72 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdisp.c,v 1.37 2006-12-13 11:25:17 adam Exp $
+ * $Id: marcdisp.c,v 1.38 2006-12-15 12:37:18 adam Exp $
  */
 
 /**
@@ -32,8 +32,6 @@
 #include <libxml/tree.h>
 #endif
 
-static void yaz_marc_reset(yaz_marc_t mt);
-
 /** \brief node types for yaz_marc_node */
 enum YAZ_MARC_NODE_TYPE
 { 
@@ -120,11 +118,16 @@ void yaz_marc_destroy(yaz_marc_t mt)
     xfree(mt);
 }
 
+NMEM yaz_marc_get_nmem(yaz_marc_t mt)
+{
+    return mt->nmem;
+}
+
 static int marc_exec_leader(const char *leader_spec, char *leader,
                             size_t size);
 
 
-struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
+static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
 {
     struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
     n->next = 0;
@@ -133,6 +136,18 @@ struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
     return n;
 }
 
+#if YAZ_HAVE_XML2
+void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+                                   const xmlNode *ptr_data)
+{
+    struct yaz_marc_node *n = yaz_marc_add_node(mt);
+    n->which = YAZ_MARC_CONTROLFIELD;
+    n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
+    n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
+}
+#endif
+
+
 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
 {
     struct yaz_marc_node *n = yaz_marc_add_node(mt);
@@ -161,6 +176,11 @@ void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
     va_end (ap);
 }
 
+int yaz_marc_get_debug(yaz_marc_t mt)
+{
+    return mt->debug;
+}
+
 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
 {
     struct yaz_marc_node *n = yaz_marc_add_node(mt);
@@ -190,17 +210,6 @@ void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
     }
 }
 
-#if YAZ_HAVE_XML2
-void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
-                                   const xmlNode *ptr_data)
-{
-    struct yaz_marc_node *n = yaz_marc_add_node(mt);
-    n->which = YAZ_MARC_CONTROLFIELD;
-    n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
-    n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
-}
-#endif
-
 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
                             const char *indicator, size_t indicator_len)
 {
@@ -266,23 +275,13 @@ static int atoi_n_check(const char *buf, int size, int *val)
     return 1;
 }
 
-/** \brief reads the MARC 24 bytes leader and checks content
-    \param mt handle
-    \param leader of the 24 byte leader
-    \param indicator_length indicator length
-    \param identifier_length identifier length
-    \param base_address base address
-    \param length_data_entry length of data entry
-    \param length_starting length of starting 
-    \param length_implementation length of implementation defined data
-*/
-static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
-                                 int *indicator_length,
-                                 int *identifier_length,
-                                 int *base_address,
-                                 int *length_data_entry,
-                                 int *length_starting,
-                                 int *length_implementation)
+void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
+                         int *indicator_length,
+                         int *identifier_length,
+                         int *base_address,
+                         int *length_data_entry,
+                         int *length_starting,
+                         int *length_implementation)
 {
     char leader[24];
 
@@ -384,7 +383,7 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
     return 1; /* we don't know */
 }
                               
-static void yaz_marc_reset(yaz_marc_t mt)
+void yaz_marc_reset(yaz_marc_t mt)
 {
     nmem_reset(mt->nmem);
     mt->nodes = 0;
@@ -768,391 +767,6 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
     return 0;
 }
 
-#if YAZ_HAVE_XML2
-int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
-{
-    for (; ptr; ptr = ptr->next)
-    {
-        if (ptr->type == XML_ELEMENT_NODE)
-        {
-            if (!strcmp((const char *) ptr->name, "subfield"))
-            {
-                size_t ctrl_data_len = 0;
-                char *ctrl_data_buf = 0;
-                const xmlNode *p = 0, *ptr_code = 0;
-                struct _xmlAttr *attr;
-                for (attr = ptr->properties; attr; attr = attr->next)
-                    if (!strcmp((const char *)attr->name, "code"))
-                        ptr_code = attr->children;
-                    else
-                    {
-                        yaz_marc_cprintf(
-                            mt, "Bad attribute '%.80s' for 'subfield'",
-                            attr->name);
-                        return -1;
-                    }
-                if (!ptr_code)
-                {
-                    yaz_marc_cprintf(
-                        mt, "Missing attribute 'code' for 'subfield'" );
-                    return -1;
-                }
-                if (ptr_code->type == XML_TEXT_NODE)
-                {
-                    ctrl_data_len = 
-                        strlen((const char *)ptr_code->content);
-                }
-                else
-                {
-                    yaz_marc_cprintf(
-                        mt, "Missing value for 'code' in 'subfield'" );
-                    return -1;
-                }
-                for (p = ptr->children; p ; p = p->next)
-                    if (p->type == XML_TEXT_NODE)
-                        ctrl_data_len += strlen((const char *)p->content);
-                ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
-                strcpy(ctrl_data_buf, (const char *)ptr_code->content);
-                for (p = ptr->children; p ; p = p->next)
-                    if (p->type == XML_TEXT_NODE)
-                        strcat(ctrl_data_buf, (const char *)p->content);
-                yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
-            }
-            else
-            {
-                yaz_marc_cprintf(
-                    mt, "Expected element 'subfield', got '%.80s'", ptr->name);
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
-{
-    int indicator_length;
-    int identifier_length;
-    int base_address;
-    int length_data_entry;
-    int length_starting;
-    int length_implementation;
-    const char *leader = 0;
-    const xmlNode *ptr = *ptr_p;
-
-    for(; ptr; ptr = ptr->next)
-        if (ptr->type == XML_ELEMENT_NODE)
-        {
-            if (!strcmp((const char *) ptr->name, "leader"))
-            {
-                xmlNode *p = ptr->children;
-                for(; p; p = p->next)
-                    if (p->type == XML_TEXT_NODE)
-                        leader = (const char *) p->content;
-                break;
-            }
-            else
-            {
-                yaz_marc_cprintf(
-                    mt, "Expected element 'leader', got '%.80s'", ptr->name);
-                return -1;
-            }
-        }
-    if (!leader)
-    {
-        yaz_marc_cprintf(mt, "Missing element 'leader'");
-        return -1;
-    }
-    if (strlen(leader) != 24)
-    {
-        yaz_marc_cprintf(mt, "Bad length %d of leader data."
-                         " Must have length of 24 characters", strlen(leader));
-        return -1;
-    }
-    yaz_marc_read_leader(mt, leader,
-                         &indicator_length,
-                         &identifier_length,
-                         &base_address,
-                         &length_data_entry,
-                         &length_starting,
-                         &length_implementation);
-    *ptr_p = ptr;
-    return 0;
-}
-
-static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
-{
-    for(; ptr; ptr = ptr->next)
-        if (ptr->type == XML_ELEMENT_NODE)
-        {
-            if (!strcmp((const char *) ptr->name, "controlfield"))
-            {
-                const xmlNode *ptr_tag = 0;
-                struct _xmlAttr *attr;
-                for (attr = ptr->properties; attr; attr = attr->next)
-                    if (!strcmp((const char *)attr->name, "tag"))
-                        ptr_tag = attr->children;
-                    else
-                    {
-                        yaz_marc_cprintf(
-                            mt, "Bad attribute '%.80s' for 'controlfield'",
-                            attr->name);
-                        return -1;
-                    }
-                if (!ptr_tag)
-                {
-                    yaz_marc_cprintf(
-                        mt, "Missing attribute 'tag' for 'controlfield'" );
-                    return -1;
-                }
-                yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
-            }
-            else if (!strcmp((const char *) ptr->name, "datafield"))
-            {
-                char indstr[11]; /* 0(unused), 1,....9, + zero term */
-                const xmlNode *ptr_tag = 0;
-                struct _xmlAttr *attr;
-                int i;
-                for (i = 0; i<11; i++)
-                    indstr[i] = '\0';
-                for (attr = ptr->properties; attr; attr = attr->next)
-                    if (!strcmp((const char *)attr->name, "tag"))
-                        ptr_tag = attr->children;
-                    else if (strlen((const char *)attr->name) == 4 &&
-                             !memcmp(attr->name, "ind", 3))
-                    {
-                        int no = atoi((const char *)attr->name+3);
-                        if (attr->children
-                            && attr->children->type == XML_TEXT_NODE)
-                            indstr[no] = attr->children->content[0];
-                    }
-                    else
-                    {
-                        yaz_marc_cprintf(
-                            mt, "Bad attribute '%.80s' for 'datafield'",
-                            attr->name);
-                        return -1;
-                    }
-                if (!ptr_tag)
-                {
-                    yaz_marc_cprintf(
-                        mt, "Missing attribute 'tag' for 'datafield'" );
-                    return -1;
-                }
-                /* note that indstr[0] is unused so we use indstr[1..] */
-                yaz_marc_add_datafield_xml(mt, ptr_tag,
-                                           indstr+1, strlen(indstr+1));
-                
-                if (yaz_marc_read_xml_subfields(mt, ptr->children))
-                    return -1;
-            }
-            else
-            {
-                yaz_marc_cprintf(mt,
-                                 "Expected element controlfield or datafield,"
-                                 " got %.80s", ptr->name);
-                return -1;
-            }
-        }
-    return 0;
-}
-#endif
-
-int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
-{
-#if YAZ_HAVE_XML2
-    for(; ptr; ptr = ptr->next)
-        if (ptr->type == XML_ELEMENT_NODE)
-        {
-            if (!strcmp((const char *) ptr->name, "record"))
-                break;
-            else
-            {
-                yaz_marc_cprintf(
-                    mt, "Unknown element '%.80s' in MARC XML reader",
-                    ptr->name);
-                return -1;
-            }
-        }
-    if (!ptr)
-    {
-        yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
-        return -1;
-    }
-    /* ptr points to record node now */
-    ptr = ptr->children;
-    if (yaz_marc_read_xml_leader(mt, &ptr))
-        return -1;
-    return yaz_marc_read_xml_fields(mt, ptr->next);
-#else
-    return -1;
-#endif
-}
-
-int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
-{
-    int entry_p;
-    int record_length;
-    int indicator_length;
-    int identifier_length;
-    int end_of_directory;
-    int base_address;
-    int length_data_entry;
-    int length_starting;
-    int length_implementation;
-
-    yaz_marc_reset(mt);
-
-    record_length = atoi_n (buf, 5);
-    if (record_length < 25)
-    {
-        yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
-        return -1;
-    }
-    /* ballout if bsize is known and record_length is less than that */
-    if (bsize != -1 && record_length > bsize)
-    {
-        yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
-                         record_length, bsize);
-        return -1;
-    }
-    if (mt->debug)
-        yaz_marc_cprintf(mt, "Record length         %5d", record_length);
-
-    yaz_marc_read_leader(mt, buf,
-                         &indicator_length,
-                         &identifier_length,
-                         &base_address,
-                         &length_data_entry,
-                         &length_starting,
-                         &length_implementation);
-
-    /* First pass. determine length of directory & base of data */
-    for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
-    {
-        /* length of directory entry */
-        int l = 3 + length_data_entry + length_starting;
-        if (entry_p + l >= record_length)
-        {
-            yaz_marc_cprintf(mt, "Directory offset %d: end of record."
-                             " Missing FS char", entry_p);
-            return -1;
-        }
-        if (mt->debug)
-        {
-            yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
-                             entry_p, buf+entry_p);
-        }
-        /* Check for digits in length info */
-        while (--l >= 3)
-            if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
-                break;
-        if (l >= 3)
-        {
-            /* Not all digits, so stop directory scan */
-            yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
-                             " length and/or length starting", entry_p);
-            break;
-        }
-        entry_p += 3 + length_data_entry + length_starting;
-    }
-    end_of_directory = entry_p;
-    if (base_address != entry_p+1)
-    {
-        yaz_marc_cprintf(mt, "Base address not at end of directory,"
-                         " base %d, end %d", base_address, entry_p+1);
-    }
-
-    /* Second pass. parse control - and datafields */
-    for (entry_p = 24; entry_p != end_of_directory; )
-    {
-        int data_length;
-        int data_offset;
-        int end_offset;
-        int i;
-        char tag[4];
-        int identifier_flag = 0;
-        int entry_p0 = entry_p;
-
-        memcpy (tag, buf+entry_p, 3);
-        entry_p += 3;
-        tag[3] = '\0';
-        data_length = atoi_n(buf+entry_p, length_data_entry);
-        entry_p += length_data_entry;
-        data_offset = atoi_n(buf+entry_p, length_starting);
-        entry_p += length_starting;
-        i = data_offset + base_address;
-        end_offset = i+data_length-1;
-
-        if (data_length <= 0 || data_offset < 0)
-            break;
-        
-        if (mt->debug)
-        {
-            yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
-                             " data-offset %d",
-                             tag, entry_p0, data_length, data_offset);
-        }
-        if (end_offset >= record_length)
-        {
-            yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
-                             entry_p0, end_offset, record_length);
-            break;
-        }
-        
-        if (memcmp (tag, "00", 2))
-            identifier_flag = 1;  /* if not 00X assume subfields */
-        else if (indicator_length < 4 && indicator_length > 0)
-        {
-            /* Danmarc 00X have subfields */
-            if (buf[i + indicator_length] == ISO2709_IDFS)
-                identifier_flag = 1;
-            else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
-                identifier_flag = 2;
-        }
-
-        if (identifier_flag)
-        {
-            /* datafield */
-            i += identifier_flag-1;
-            yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
-            i += indicator_length;
-
-            while (i < end_offset &&
-                    buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
-            {
-                int code_offset = i+1;
-
-                i ++;
-                while (i < end_offset &&
-                        buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
-                       buf[i] != ISO2709_FS)
-                    i++;
-                yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
-            }
-        }
-        else
-        {
-            /* controlfield */
-            int i0 = i;
-            while (i < end_offset && 
-                buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
-                i++;
-            yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
-        }
-        if (i < end_offset)
-        {
-            yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
-                    data_length);
-        }
-        if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
-        {
-            yaz_marc_cprintf(mt, "No separator at end of field length=%d",
-                    data_length);
-        }
-    }
-    return record_length;
-}
 
 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
 {
@@ -1313,7 +927,6 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
     return 0;
 }
 
-
 /*
  * Local variables:
  * c-basic-offset: 4
index b5fabec..2fb9914 100644 (file)
@@ -1,6 +1,6 @@
 # Copyright (C) 1994-2006, Index Data ApS
 # All rights reserved.
-# $Id: makefile,v 1.121 2006-12-13 10:36:00 adam Exp $
+# $Id: makefile,v 1.122 2006-12-15 12:37:18 adam Exp $
 #
 # Programmed by
 #  Heikki Levanto & Adam Dickmeiss
@@ -368,6 +368,9 @@ MISC_OBJS= \
    $(OBJDIR)\atoin.obj \
    $(OBJDIR)\log.obj \
    $(OBJDIR)\marcdisp.obj \
+   $(OBJDIR)\marc_read_xml.obj \
+   $(OBJDIR)\marc_read_iso2709.obj \
+   $(OBJDIR)\marc_read_line.obj \
    $(OBJDIR)\nmem.obj \
    $(OBJDIR)\nmemsdup.obj \
    $(OBJDIR)\oid.obj \