-/*
- * Copyright (c) 1995-2004, Index Data
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) Index Data
* See the file LICENSE for details.
- *
- * $Id: marcdisp.c,v 1.6 2004-08-07 08:07:00 adam Exp $
+ */
+
+/**
+ * \file marcdisp.c
+ * \brief Implements MARC conversion utilities
*/
#if HAVE_CONFIG_H
#include <config.h>
#endif
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
#include <yaz/marcdisp.h>
#include <yaz/wrbuf.h>
#include <yaz/yaz-util.h>
+#include <yaz/nmem_xml.h>
+#include <yaz/snprintf.h>
+
+#if YAZ_HAVE_XML2
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#endif
+
+enum yaz_collection_state {
+ no_collection,
+ collection_first,
+ collection_second
+};
+
+/** \brief node types for yaz_marc_node */
+enum YAZ_MARC_NODE_TYPE
+{
+ YAZ_MARC_DATAFIELD,
+ YAZ_MARC_CONTROLFIELD,
+ YAZ_MARC_COMMENT,
+ YAZ_MARC_LEADER
+};
+
+/** \brief represets a data field */
+struct yaz_marc_datafield {
+ char *tag;
+ char *indicator;
+ struct yaz_marc_subfield *subfields;
+};
+/** \brief represents a control field */
+struct yaz_marc_controlfield {
+ char *tag;
+ char *data;
+};
+
+/** \brief a comment node */
+struct yaz_marc_comment {
+ char *comment;
+};
+
+/** \brief MARC node */
+struct yaz_marc_node {
+ enum YAZ_MARC_NODE_TYPE which;
+ union {
+ struct yaz_marc_datafield datafield;
+ struct yaz_marc_controlfield controlfield;
+ char *comment;
+ char *leader;
+ } u;
+ struct yaz_marc_node *next;
+};
+
+/** \brief represents a subfield */
+struct yaz_marc_subfield {
+ char *code_data;
+ struct yaz_marc_subfield *next;
+};
+
+/** \brief the internals of a yaz_marc_t handle */
struct yaz_marc_t_ {
WRBUF m_wr;
- int xml;
+ NMEM nmem;
+ int output_format;
int debug;
+ int write_using_libxml2;
+ enum yaz_collection_state enable_collection;
yaz_iconv_t iconv_cd;
+ char subfield_str[8];
+ char endline_str[8];
+ char *leader_spec;
+ struct yaz_marc_node *nodes;
+ struct yaz_marc_node **nodes_pp;
+ struct yaz_marc_subfield **subfield_pp;
};
yaz_marc_t yaz_marc_create(void)
{
yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
- mt->xml = YAZ_MARC_LINE;
+ mt->output_format = YAZ_MARC_LINE;
mt->debug = 0;
+ mt->write_using_libxml2 = 0;
+ mt->enable_collection = no_collection;
mt->m_wr = wrbuf_alloc();
mt->iconv_cd = 0;
+ mt->leader_spec = 0;
+ strcpy(mt->subfield_str, " $");
+ strcpy(mt->endline_str, "\n");
+
+ mt->nmem = nmem_create();
+ yaz_marc_reset(mt);
return mt;
}
{
if (!mt)
return ;
- wrbuf_free (mt->m_wr, 1);
- xfree (mt);
+ nmem_destroy(mt->nmem);
+ wrbuf_destroy(mt->m_wr);
+ xfree(mt->leader_spec);
+ xfree(mt);
}
-static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
+NMEM yaz_marc_get_nmem(yaz_marc_t mt)
{
- if (mt->xml == YAZ_MARC_ISO2709)
- wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
- else if (mt->xml == YAZ_MARC_LINE)
- wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
+ return mt->nmem;
+}
+
+static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
+{
+ wrbuf_iconv_reset(wr, mt->iconv_cd);
+}
+
+static int marc_exec_leader(const char *leader_spec, char *leader,
+ size_t size);
+#if YAZ_HAVE_XML2
+static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
+ const char *ns,
+ const char *format,
+ const char *type);
+#endif
+
+static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
+{
+ struct yaz_marc_node *n = (struct yaz_marc_node *)
+ nmem_malloc(mt->nmem, sizeof(*n));
+ n->next = 0;
+ *mt->nodes_pp = n;
+ mt->nodes_pp = &n->next;
+ return n;
+}
+
+#if YAZ_HAVE_XML2
+void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+ const xmlNode *ptr_data)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_CONTROLFIELD;
+ n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
+ n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
+}
+
+void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
+ const xmlNode *ptr_data)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_CONTROLFIELD;
+ n->u.controlfield.tag = tag;
+ n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
+}
+
+#endif
+
+
+void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_COMMENT;
+ n->u.comment = nmem_strdup(mt->nmem, comment);
+}
+
+void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
+{
+ va_list ap;
+ char buf[200];
+
+ va_start(ap, fmt);
+ yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
+ yaz_marc_add_comment(mt, buf);
+ va_end (ap);
+}
+
+int yaz_marc_get_debug(yaz_marc_t mt)
+{
+ return mt->debug;
+}
+
+void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_LEADER;
+ n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
+ marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
+}
+
+void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
+ const char *data, size_t data_len)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_CONTROLFIELD;
+ n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
+ n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
+ if (mt->debug)
+ {
+ size_t i;
+ char msg[80];
+
+ sprintf(msg, "controlfield:");
+ for (i = 0; i < 16 && i < data_len; i++)
+ sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
+ if (i < data_len)
+ sprintf(msg + strlen(msg), " ..");
+ yaz_marc_add_comment(mt, msg);
+ }
+}
+
+void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
+ const char *indicator, size_t indicator_len)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_DATAFIELD;
+ n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
+ n->u.datafield.indicator =
+ nmem_strdupn(mt->nmem, indicator, indicator_len);
+ n->u.datafield.subfields = 0;
+
+ /* make subfield_pp the current (last one) */
+ mt->subfield_pp = &n->u.datafield.subfields;
+}
+
+/** \brief adds a attribute value to the element name if it is plain chars
+
+ If not, and if the attribute name is not null, it will append a
+ attribute element with the value if attribute name is null it will
+ return a non-zero value meaning it couldnt handle the value.
+*/
+static int element_name_append_attribute_value(
+ yaz_marc_t mt, WRBUF buffer,
+ const char *attribute_name, char *code_data, size_t code_len)
+{
+ /* TODO Map special codes to something possible for XML ELEMENT names */
+
+ int encode = 0;
+ size_t index = 0;
+ int success = 0;
+ for (index = 0; index < code_len; index++)
+ {
+ if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
+ (code_data[index] >= 'a' && code_data[index] <= 'z') ||
+ (code_data[index] >= 'A' && code_data[index] <= 'Z')))
+ encode = 1;
+ }
+ /* Add as attribute */
+ if (encode && attribute_name)
+ wrbuf_printf(buffer, " %s=\"", attribute_name);
+
+ if (!encode || attribute_name)
+ wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
else
- wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
+ success = -1;
+
+ if (encode && attribute_name)
+ wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
+ return success;
}
-int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
+#if YAZ_HAVE_XML2
+void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
+ const char *indicator, size_t indicator_len)
{
- int entry_p;
- int record_length;
- int indicator_length;
- int identifier_length;
- int base_address;
- int length_data_entry;
- int length_starting;
- int length_implementation;
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_DATAFIELD;
+ n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
+ n->u.datafield.indicator =
+ nmem_strdupn(mt->nmem, indicator, indicator_len);
+ n->u.datafield.subfields = 0;
+
+ /* make subfield_pp the current (last one) */
+ mt->subfield_pp = &n->u.datafield.subfields;
+}
+
+void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
+{
+ struct yaz_marc_node *n = yaz_marc_add_node(mt);
+ n->which = YAZ_MARC_DATAFIELD;
+ n->u.datafield.tag = tag_value;
+ n->u.datafield.indicator = indicators;
+ n->u.datafield.subfields = 0;
+
+ /* make subfield_pp the current (last one) */
+ mt->subfield_pp = &n->u.datafield.subfields;
+}
+
+void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
+{
+ n->u.datafield.indicator = indicator;
+}
+
+#endif
- wrbuf_rewind(wr);
+void yaz_marc_add_subfield(yaz_marc_t mt,
+ const char *code_data, size_t code_data_len)
+{
+ if (mt->debug)
+ {
+ size_t i;
+ char msg[80];
- record_length = atoi_n (buf, 5);
- if (record_length < 25)
+ sprintf(msg, "subfield:");
+ for (i = 0; i < 16 && i < code_data_len; i++)
+ sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
+ if (i < code_data_len)
+ sprintf(msg + strlen(msg), " ..");
+ yaz_marc_add_comment(mt, msg);
+ }
+
+ if (mt->subfield_pp)
{
- if (mt->debug)
- {
- char str[40];
-
- sprintf (str, "Record length %d - aborting\n", record_length);
- wrbuf_puts (wr, str);
- }
- return -1;
+ struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
+ nmem_malloc(mt->nmem, sizeof(*n));
+ n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
+ n->next = 0;
+ /* mark subfield_pp to point to this one, so we append here next */
+ *mt->subfield_pp = n;
+ mt->subfield_pp = &n->next;
+ }
+}
+
+static void check_ascii(yaz_marc_t mt, char *leader, int offset,
+ int ch_default)
+{
+ if (leader[offset] < ' ' || leader[offset] > 127)
+ {
+ yaz_marc_cprintf(mt,
+ "Leader character at offset %d is non-ASCII. "
+ "Setting value to '%c'", offset, ch_default);
+ leader[offset] = ch_default;
+ }
+}
+
+void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
+ int *indicator_length,
+ int *identifier_length,
+ int *base_address,
+ int *length_data_entry,
+ int *length_starting,
+ int *length_implementation)
+{
+ char leader[24];
+
+ memcpy(leader, leader_c, 24);
+
+ check_ascii(mt, leader, 5, 'a');
+ check_ascii(mt, leader, 6, 'a');
+ check_ascii(mt, leader, 7, 'a');
+ check_ascii(mt, leader, 8, '#');
+ check_ascii(mt, leader, 9, '#');
+ if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
+ {
+ yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
+ " hold a number 1-9. Assuming 2");
+ leader[10] = '2';
+ *indicator_length = 2;
+ }
+ if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
+ {
+ yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
+ " hold a number 1-9. Assuming 2");
+ leader[11] = '2';
+ *identifier_length = 2;
+ }
+ if (!atoi_n_check(leader+12, 5, base_address))
+ {
+ yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
+ " hold a number. Assuming 0");
+ *base_address = 0;
+ }
+ check_ascii(mt, leader, 17, '#');
+ check_ascii(mt, leader, 18, '#');
+ check_ascii(mt, leader, 19, '#');
+ if (!atoi_n_check(leader+20, 1, length_data_entry) ||
+ *length_data_entry < 3)
+ {
+ yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
+ " hold a number 3-9. Assuming 4");
+ *length_data_entry = 4;
+ leader[20] = '4';
+ }
+ if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
+ {
+ yaz_marc_cprintf(mt, "Length starting at offset 21 should"
+ " hold a number 4-9. Assuming 5");
+ *length_starting = 5;
+ leader[21] = '5';
+ }
+ if (!atoi_n_check(leader+22, 1, length_implementation))
+ {
+ yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
+ " hold a number. Assuming 0");
+ *length_implementation = 0;
+ leader[22] = '0';
+ }
+ check_ascii(mt, leader, 23, '0');
+
+ if (mt->debug)
+ {
+ yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
+ yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
+ yaz_marc_cprintf(mt, "Base address %5d", *base_address);
+ yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
+ yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
+ yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
+ }
+ yaz_marc_add_leader(mt, leader, 24);
+}
+
+void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
+{
+ strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
+ mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
+}
+
+void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
+{
+ strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
+ mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
+}
+
+/* try to guess how many bytes the identifier really is! */
+static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
+{
+ if (mt->iconv_cd)
+ {
+ size_t i;
+ for (i = 1; i<5; i++)
+ {
+ char outbuf[12];
+ size_t outbytesleft = sizeof(outbuf);
+ char *outp = outbuf;
+ const char *inp = buf;
+
+ size_t inbytesleft = i;
+ size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
+ &outp, &outbytesleft);
+ yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
+ if (r != (size_t) (-1))
+ return i; /* got a complete sequence */
+ }
+ return 1; /* giving up */
}
- /* ballout if bsize is known and record_length is less than that */
- if (bsize != -1 && record_length > bsize)
- return -1;
- if (isdigit(buf[10]))
- indicator_length = atoi_n (buf+10, 1);
else
- indicator_length = 2;
- if (isdigit(buf[11]))
- identifier_length = atoi_n (buf+11, 1);
+ {
+ int error = 0;
+ size_t no_read = 0;
+ (void) yaz_read_UTF8_char((const unsigned char *) buf, strlen(buf),
+ &no_read, &error);
+ if (error == 0 && no_read > 0)
+ return no_read;
+ }
+ return 1; /* we don't know */
+}
+
+void yaz_marc_reset(yaz_marc_t mt)
+{
+ nmem_reset(mt->nmem);
+ mt->nodes = 0;
+ mt->nodes_pp = &mt->nodes;
+ mt->subfield_pp = 0;
+}
+
+int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
+{
+ struct yaz_marc_node *n;
+ int identifier_length;
+ const char *leader = 0;
+
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ {
+ leader = n->u.leader;
+ break;
+ }
+
+ if (!leader)
+ return -1;
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+
+ for (n = mt->nodes; n; n = n->next)
+ {
+ switch(n->which)
+ {
+ case YAZ_MARC_COMMENT:
+ wrbuf_iconv_write(wr, mt->iconv_cd,
+ n->u.comment, strlen(n->u.comment));
+ wrbuf_puts(wr, "\n");
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+static size_t get_subfield_len(yaz_marc_t mt, const char *data,
+ int identifier_length)
+{
+ /* if identifier length is 2 (most MARCs) or less (probably an error),
+ the code is a single character .. However we've
+ seen multibyte codes, so see how big it really is */
+ if (identifier_length > 2)
+ return identifier_length - 1;
else
- identifier_length = 2;
- base_address = atoi_n (buf+12, 5);
+ return cdata_one_character(mt, data);
+}
- length_data_entry = atoi_n (buf+20, 1);
- length_starting = atoi_n (buf+21, 1);
- length_implementation = atoi_n (buf+22, 1);
+int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
+{
+ struct yaz_marc_node *n;
+ int identifier_length;
+ const char *leader = 0;
+
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ {
+ leader = n->u.leader;
+ break;
+ }
+
+ if (!leader)
+ return -1;
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
- if (mt->xml != YAZ_MARC_LINE)
+ for (n = mt->nodes; n; n = n->next)
{
- char str[80];
- int i;
- switch(mt->xml)
+ struct yaz_marc_subfield *s;
+ switch(n->which)
{
- case YAZ_MARC_ISO2709:
- break;
- case YAZ_MARC_SIMPLEXML:
- wrbuf_puts (wr, "<iso2709\n");
- sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
- wrbuf_puts (wr, str);
- sprintf (str, " TypeOfRecord=\"%c\"\n", buf[6]);
- wrbuf_puts (wr, str);
- for (i = 1; i<=19; i++)
+ case YAZ_MARC_DATAFIELD:
+ wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
+ n->u.datafield.indicator);
+ for (s = n->u.datafield.subfields; s; s = s->next)
{
- sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]);
- wrbuf_puts (wr, str);
+ size_t using_code_len = get_subfield_len(mt, s->code_data,
+ identifier_length);
+
+ wrbuf_puts (wr, mt->subfield_str);
+ wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
+ using_code_len);
+ wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
+ wrbuf_iconv_puts(wr, mt->iconv_cd,
+ s->code_data + using_code_len);
+ marc_iconv_reset(mt, wr);
}
- wrbuf_puts (wr, ">\n");
- break;
- case YAZ_MARC_OAIMARC:
- wrbuf_puts(
- wr,
- "<oai_marc xmlns=\"http://www.openarchives.org/OIA/oai_marc\""
- "\n"
- " xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\""
- "\n"
- " xsi:schemaLocation=\"http://www.openarchives.org/OAI/oai_marc.xsd\""
- "\n"
- );
-
- sprintf (str, " status=\"%c\" type=\"%c\" catForm=\"%c\">\n",
- buf[5], buf[6], buf[7]);
- wrbuf_puts (wr, str);
+ wrbuf_puts (wr, mt->endline_str);
break;
+ case YAZ_MARC_CONTROLFIELD:
+ wrbuf_printf(wr, "%s", n->u.controlfield.tag);
+ wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
+ wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
+ marc_iconv_reset(mt, wr);
+ wrbuf_puts (wr, mt->endline_str);
+ break;
+ case YAZ_MARC_COMMENT:
+ wrbuf_puts(wr, "(");
+ wrbuf_iconv_write(wr, mt->iconv_cd,
+ n->u.comment, strlen(n->u.comment));
+ marc_iconv_reset(mt, wr);
+ wrbuf_puts(wr, ")\n");
+ break;
+ case YAZ_MARC_LEADER:
+ wrbuf_printf(wr, "%s\n", n->u.leader);
+ }
+ }
+ wrbuf_puts(wr, "\n");
+ return 0;
+}
+
+int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
+{
+ if (mt->enable_collection == collection_second)
+ {
+ switch(mt->output_format)
+ {
case YAZ_MARC_MARCXML:
- wrbuf_printf(
- wr,
- "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
- " <leader>");
-#if 1
- marc_cdata(mt, buf, 9, wr);
- marc_cdata(mt, "a", 1, wr); /* set leader to signal unicode */
- marc_cdata(mt, buf+10, 14, wr);
-#else
- marc_cdata(mt, buf, 24, wr); /* leave header as is .. */
-#endif
- wrbuf_printf(wr, "</leader>\n");
+ case YAZ_MARC_TURBOMARC:
+ wrbuf_printf(wr, "</collection>\n");
+ break;
+ case YAZ_MARC_XCHANGE:
+ wrbuf_printf(wr, "</collection>\n");
break;
}
}
- if (mt->debug)
+ return 0;
+}
+
+void yaz_marc_enable_collection(yaz_marc_t mt)
+{
+ mt->enable_collection = collection_first;
+}
+
+int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
+{
+ switch(mt->output_format)
{
- char str[40];
-
- if (mt->xml)
- wrbuf_puts (wr, "<!--\n");
- sprintf (str, "Record length %5d\n", record_length);
- wrbuf_puts (wr, str);
- sprintf (str, "Indicator length %5d\n", indicator_length);
- wrbuf_puts (wr, str);
- sprintf (str, "Identifier length %5d\n", identifier_length);
- wrbuf_puts (wr, str);
- sprintf (str, "Base address %5d\n", base_address);
- wrbuf_puts (wr, str);
- sprintf (str, "Length data entry %5d\n", length_data_entry);
- wrbuf_puts (wr, str);
- sprintf (str, "Length starting %5d\n", length_starting);
- wrbuf_puts (wr, str);
- sprintf (str, "Length implementation %5d\n", length_implementation);
- wrbuf_puts (wr, str);
- if (mt->xml)
- wrbuf_puts (wr, "-->\n");
+ case YAZ_MARC_LINE:
+ return yaz_marc_write_line(mt, wr);
+ case YAZ_MARC_MARCXML:
+ return yaz_marc_write_marcxml(mt, wr);
+ case YAZ_MARC_TURBOMARC:
+ return yaz_marc_write_turbomarc(mt, wr);
+ case YAZ_MARC_XCHANGE:
+ return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
+ case YAZ_MARC_ISO2709:
+ return yaz_marc_write_iso2709(mt, wr);
+ case YAZ_MARC_CHECK:
+ return yaz_marc_write_check(mt, wr);
+ case YAZ_MARC_JSON:
+ return yaz_marc_write_json(mt, wr);
}
+ return -1;
+}
- /* first pass. determine length of directory & base of data */
- for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+static const char *record_name[2] = { "record", "r"};
+static const char *leader_name[2] = { "leader", "l"};
+static const char *controlfield_name[2] = { "controlfield", "c"};
+static const char *datafield_name[2] = { "datafield", "d"};
+static const char *indicator_name[2] = { "ind", "i"};
+static const char *subfield_name[2] = { "subfield", "s"};
+
+/** \brief common MARC XML/Xchange/turbomarc writer
+ \param mt handle
+ \param wr WRBUF output
+ \param ns XMLNS for the elements
+ \param format record format (e.g. "MARC21")
+ \param type record type (e.g. "Bibliographic")
+ \param turbo =1 for turbomarc
+ \retval 0 OK
+ \retval -1 failure
+*/
+static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
+ const char *ns,
+ const char *format,
+ const char *type,
+ int turbo)
+{
+ struct yaz_marc_node *n;
+ int identifier_length;
+ const char *leader = 0;
+
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ {
+ leader = n->u.leader;
+ break;
+ }
+
+ if (!leader)
+ return -1;
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+
+ if (mt->enable_collection != no_collection)
{
- entry_p += 3+length_data_entry+length_starting;
- if (entry_p >= record_length)
- return -1;
+ if (mt->enable_collection == collection_first)
+ {
+ wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
+ mt->enable_collection = collection_second;
+ }
+ wrbuf_printf(wr, "<%s", record_name[turbo]);
+ }
+ else
+ {
+ wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
+ }
+ if (format)
+ wrbuf_printf(wr, " format=\"%.80s\"", format);
+ if (type)
+ wrbuf_printf(wr, " type=\"%.80s\"", type);
+ wrbuf_printf(wr, ">\n");
+ for (n = mt->nodes; n; n = n->next)
+ {
+ struct yaz_marc_subfield *s;
+
+ switch(n->which)
+ {
+ case YAZ_MARC_DATAFIELD:
+
+ wrbuf_printf(wr, " <%s", datafield_name[turbo]);
+ if (!turbo)
+ wrbuf_printf(wr, " tag=\"");
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
+ strlen(n->u.datafield.tag));
+ if (!turbo)
+ wrbuf_printf(wr, "\"");
+ if (n->u.datafield.indicator)
+ {
+ int i;
+ for (i = 0; n->u.datafield.indicator[i]; i++)
+ {
+ wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+ n->u.datafield.indicator+i, 1);
+ wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
+ }
+ }
+ wrbuf_printf(wr, ">\n");
+ for (s = n->u.datafield.subfields; s; s = s->next)
+ {
+ size_t using_code_len = get_subfield_len(mt, s->code_data,
+ identifier_length);
+ wrbuf_printf(wr, " <%s", subfield_name[turbo]);
+ if (!turbo)
+ {
+ wrbuf_printf(wr, " code=\"");
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+ s->code_data, using_code_len);
+ wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
+ }
+ else
+ {
+ element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
+ wrbuf_puts(wr, ">");
+ }
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+ s->code_data + using_code_len,
+ strlen(s->code_data + using_code_len));
+ marc_iconv_reset(mt, wr);
+ wrbuf_printf(wr, "</%s", subfield_name[turbo]);
+ if (turbo)
+ element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
+ wrbuf_puts(wr, ">\n");
+ }
+ wrbuf_printf(wr, " </%s", datafield_name[turbo]);
+ /* TODO Not CDATA */
+ if (turbo)
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
+ strlen(n->u.datafield.tag));
+ wrbuf_printf(wr, ">\n");
+ break;
+ case YAZ_MARC_CONTROLFIELD:
+ wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
+ if (!turbo)
+ {
+ wrbuf_printf(wr, " tag=\"");
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+ strlen(n->u.controlfield.tag));
+ wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
+ }
+ else
+ {
+ /* TODO convert special */
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+ strlen(n->u.controlfield.tag));
+ wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
+ }
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
+ n->u.controlfield.data,
+ strlen(n->u.controlfield.data));
+ marc_iconv_reset(mt, wr);
+ wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
+ /* TODO convert special */
+ if (turbo)
+ wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
+ strlen(n->u.controlfield.tag));
+ wrbuf_puts(wr, ">\n");
+ break;
+ case YAZ_MARC_COMMENT:
+ wrbuf_printf(wr, "<!-- ");
+ wrbuf_puts(wr, n->u.comment);
+ wrbuf_printf(wr, " -->\n");
+ break;
+ case YAZ_MARC_LEADER:
+ wrbuf_printf(wr, " <%s>", leader_name[turbo]);
+ wrbuf_iconv_write_cdata(wr,
+ 0 , /* no charset conversion for leader */
+ n->u.leader, strlen(n->u.leader));
+ wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
+ }
}
- if (mt->debug && base_address != entry_p+1)
+ wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
+ return 0;
+}
+
+static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
+ const char *ns,
+ const char *format,
+ const char *type,
+ int turbo)
+{
+ if (mt->write_using_libxml2)
{
- wrbuf_printf (wr," <!-- base address not at end of directory "
- "base=%d end=%d -->\n", base_address, entry_p+1);
+#if YAZ_HAVE_XML2
+ int ret;
+ xmlNode *root_ptr;
+
+ if (!turbo)
+ ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
+ else
+ ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
+ if (ret == 0)
+ {
+ xmlChar *buf_out;
+ xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
+ int len_out;
+
+ xmlDocSetRootElement(doc, root_ptr);
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+
+ wrbuf_write(wr, (const char *) buf_out, len_out);
+ wrbuf_puts(wr, "");
+ xmlFree(buf_out);
+ xmlFreeDoc(doc);
+ }
+ return ret;
+#else
+ return -1;
+#endif
}
- base_address = entry_p+1;
+ else
+ return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
+}
+
+int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
+{
+ /* set leader 09 to 'a' for UNICODE */
+ /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
+ if (!mt->leader_spec)
+ yaz_marc_modify_leader(mt, 9, "a");
+ return yaz_marc_write_marcxml_ns(mt, wr,
+ "http://www.loc.gov/MARC21/slim",
+ 0, 0, 0);
+}
+
+int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
+{
+ /* set leader 09 to 'a' for UNICODE */
+ /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
+ if (!mt->leader_spec)
+ yaz_marc_modify_leader(mt, 9, "a");
+ return yaz_marc_write_marcxml_ns(mt, wr,
+ "http://www.indexdata.com/turbomarc", 0, 0, 1);
+}
+
+int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
+ const char *format,
+ const char *type)
+{
+ return yaz_marc_write_marcxml_ns(mt, wr,
+ "info:lc/xmlns/marcxchange-v1",
+ 0, 0, 0);
+}
- if (mt->xml == YAZ_MARC_ISO2709)
+#if YAZ_HAVE_XML2
+
+void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
+ xmlNode *record_ptr,
+ xmlNsPtr ns_record, WRBUF wr_cdata,
+ int identifier_length)
+{
+ xmlNode *ptr;
+ struct yaz_marc_subfield *s;
+ WRBUF subfield_name = wrbuf_alloc();
+
+ /* TODO consider if safe */
+ char field[10];
+ field[0] = 'd';
+ strncpy(field + 1, n->u.datafield.tag, 3);
+ field[4] = '\0';
+ ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
+
+ if (n->u.datafield.indicator)
{
- WRBUF wr_head = wrbuf_alloc();
- WRBUF wr_dir = wrbuf_alloc();
- WRBUF wr_tmp = wrbuf_alloc();
-
- int data_p = 0;
- /* second pass. create directory for ISO2709 output */
- for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
- {
- int data_length, data_offset, end_offset;
- int i, sz1, sz2;
-
- wrbuf_write(wr_dir, buf+entry_p, 3);
- entry_p += 3;
-
- data_length = atoi_n (buf+entry_p, length_data_entry);
- entry_p += length_data_entry;
- data_offset = atoi_n (buf+entry_p, length_starting);
- entry_p += length_starting;
- i = data_offset + base_address;
- end_offset = i+data_length-1;
-
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS &&
- i < end_offset)
- i++;
- sz1 = 1+i - (data_offset + base_address);
- if (mt->iconv_cd)
- {
- sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
- buf + data_offset+base_address, sz1);
- wrbuf_rewind(wr_tmp);
- }
- else
- sz2 = sz1;
- wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
- wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
- data_p += sz2;
- }
- wrbuf_putc(wr_dir, ISO2709_FS);
- wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
- wrbuf_write(wr_head, buf+5, 7);
- wrbuf_printf(wr_head, "%05d", base_address);
- wrbuf_write(wr_head, buf+17, 7);
-
- wrbuf_write(wr, wrbuf_buf(wr_head), 24);
- wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
- wrbuf_free(wr_head, 1);
- wrbuf_free(wr_dir, 1);
- wrbuf_free(wr_tmp, 1);
+ int i;
+ for (i = 0; n->u.datafield.indicator[i]; i++)
+ {
+ char ind_str[6];
+ char ind_val[2];
+
+ ind_val[0] = n->u.datafield.indicator[i];
+ ind_val[1] = '\0';
+ sprintf(ind_str, "%s%d", indicator_name[1], i+1);
+ xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
+ }
}
- /* third pass. create data output */
- for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+ for (s = n->u.datafield.subfields; s; s = s->next)
{
- int data_length;
- int data_offset;
- int end_offset;
- int i, j;
- char tag[4];
- int identifier_flag = 1;
-
- memcpy (tag, buf+entry_p, 3);
- entry_p += 3;
- tag[3] = '\0';
- data_length = atoi_n (buf+entry_p, length_data_entry);
- entry_p += length_data_entry;
- data_offset = atoi_n (buf+entry_p, length_starting);
- entry_p += length_starting;
- i = data_offset + base_address;
- end_offset = i+data_length-1;
-
- if (indicator_length < 4 && indicator_length > 0)
+ int not_written;
+ xmlNode *ptr_subfield;
+ size_t using_code_len = get_subfield_len(mt, s->code_data,
+ identifier_length);
+ wrbuf_rewind(wr_cdata);
+ wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
+ marc_iconv_reset(mt, wr_cdata);
+
+ wrbuf_rewind(subfield_name);
+ wrbuf_puts(subfield_name, "s");
+ not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
+ ptr_subfield = xmlNewTextChild(ptr, ns_record,
+ BAD_CAST wrbuf_cstr(subfield_name),
+ BAD_CAST wrbuf_cstr(wr_cdata));
+ if (not_written)
{
- if (buf[i + indicator_length] != ISO2709_IDFS)
- identifier_flag = 0;
+ /* Generate code attribute value and add */
+ wrbuf_rewind(wr_cdata);
+ wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
+ xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
}
- else if (!memcmp (tag, "00", 2))
- identifier_flag = 0;
-
- switch(mt->xml)
+ }
+ wrbuf_destroy(subfield_name);
+}
+
+static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
+ const char *ns,
+ const char *format,
+ const char *type)
+{
+ struct yaz_marc_node *n;
+ int identifier_length;
+ const char *leader = 0;
+ xmlNode *record_ptr;
+ xmlNsPtr ns_record;
+ WRBUF wr_cdata = 0;
+
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
{
- case YAZ_MARC_LINE:
- if (mt->debug)
- wrbuf_puts (wr, "Tag: ");
- wrbuf_puts (wr, tag);
- wrbuf_puts (wr, " ");
- break;
- case YAZ_MARC_SIMPLEXML:
- wrbuf_printf (wr, "<field tag=\"%s\"", tag);
- break;
- case YAZ_MARC_OAIMARC:
- if (identifier_flag)
- wrbuf_printf (wr, " <varfield id=\"%s\"", tag);
- else
- wrbuf_printf (wr, " <fixfield id=\"%s\"", tag);
+ leader = n->u.leader;
+ break;
+ }
+
+ if (!leader)
+ return -1;
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+
+ wr_cdata = wrbuf_alloc();
+
+ record_ptr = xmlNewNode(0, BAD_CAST "r");
+ *root_ptr = record_ptr;
+
+ ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
+ xmlSetNs(record_ptr, ns_record);
+
+ if (format)
+ xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
+ if (type)
+ xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
+ for (n = mt->nodes; n; n = n->next)
+ {
+ xmlNode *ptr;
+
+ char field[10];
+ field[0] = 'c';
+ field[4] = '\0';
+
+ switch(n->which)
+ {
+ case YAZ_MARC_DATAFIELD:
+ add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
+ break;
+ case YAZ_MARC_CONTROLFIELD:
+ wrbuf_rewind(wr_cdata);
+ wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
+ marc_iconv_reset(mt, wr_cdata);
+
+ strncpy(field + 1, n->u.controlfield.tag, 3);
+ ptr = xmlNewTextChild(record_ptr, ns_record,
+ BAD_CAST field,
+ BAD_CAST wrbuf_cstr(wr_cdata));
+ break;
+ case YAZ_MARC_COMMENT:
+ ptr = xmlNewComment(BAD_CAST n->u.comment);
+ xmlAddChild(record_ptr, ptr);
+ break;
+ case YAZ_MARC_LEADER:
+ xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
+ BAD_CAST n->u.leader);
+ break;
+ }
+ }
+ wrbuf_destroy(wr_cdata);
+ return 0;
+}
+
+
+int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
+ const char *ns,
+ const char *format,
+ const char *type)
+{
+ struct yaz_marc_node *n;
+ int identifier_length;
+ const char *leader = 0;
+ xmlNode *record_ptr;
+ xmlNsPtr ns_record;
+ WRBUF wr_cdata = 0;
+
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ {
+ leader = n->u.leader;
break;
- case YAZ_MARC_MARCXML:
- if (identifier_flag)
- wrbuf_printf (wr, " <datafield tag=\"%s\"", tag);
- else
- wrbuf_printf (wr, " <controlfield tag=\"%s\"", tag);
}
-
- if (identifier_flag)
- {
- for (j = 0; j<indicator_length; j++, i++)
+
+ if (!leader)
+ return -1;
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+
+ wr_cdata = wrbuf_alloc();
+
+ record_ptr = xmlNewNode(0, BAD_CAST "record");
+ *root_ptr = record_ptr;
+
+ ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
+ xmlSetNs(record_ptr, ns_record);
+
+ if (format)
+ xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
+ if (type)
+ xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
+ for (n = mt->nodes; n; n = n->next)
+ {
+ struct yaz_marc_subfield *s;
+ xmlNode *ptr;
+
+ switch(n->which)
+ {
+ case YAZ_MARC_DATAFIELD:
+ ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
+ xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
+ if (n->u.datafield.indicator)
{
- switch(mt->xml)
+ int i;
+ for (i = 0; n->u.datafield.indicator[i]; i++)
{
- case YAZ_MARC_ISO2709:
- wrbuf_putc(wr, buf[i]);
- break;
- case YAZ_MARC_LINE:
- if (mt->debug)
- wrbuf_puts (wr, " Ind: ");
- wrbuf_putc(wr, buf[i]);
- break;
- case YAZ_MARC_SIMPLEXML:
- wrbuf_printf(wr, " Indicator%d=\"%c\"", j+1, buf[i]);
- break;
- case YAZ_MARC_OAIMARC:
- wrbuf_printf(wr, " i%d=\"%c\"", j+1, buf[i]);
- break;
- case YAZ_MARC_MARCXML:
- wrbuf_printf(wr, " ind%d=\"%c\"", j+1, buf[i]);
+ char ind_str[6];
+ char ind_val[2];
+
+ sprintf(ind_str, "ind%d", i+1);
+ ind_val[0] = n->u.datafield.indicator[i];
+ ind_val[1] = '\0';
+ xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
}
}
- }
- if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
- || mt->xml == YAZ_MARC_OAIMARC)
+ for (s = n->u.datafield.subfields; s; s = s->next)
+ {
+ xmlNode *ptr_subfield;
+ size_t using_code_len = get_subfield_len(mt, s->code_data,
+ identifier_length);
+ wrbuf_rewind(wr_cdata);
+ wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
+ s->code_data + using_code_len);
+ marc_iconv_reset(mt, wr_cdata);
+ ptr_subfield = xmlNewTextChild(
+ ptr, ns_record,
+ BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
+
+ wrbuf_rewind(wr_cdata);
+ wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
+ s->code_data, using_code_len);
+ xmlNewProp(ptr_subfield, BAD_CAST "code",
+ BAD_CAST wrbuf_cstr(wr_cdata));
+ }
+ break;
+ case YAZ_MARC_CONTROLFIELD:
+ wrbuf_rewind(wr_cdata);
+ wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
+ marc_iconv_reset(mt, wr_cdata);
+
+ ptr = xmlNewTextChild(record_ptr, ns_record,
+ BAD_CAST "controlfield",
+ BAD_CAST wrbuf_cstr(wr_cdata));
+
+ xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
+ break;
+ case YAZ_MARC_COMMENT:
+ ptr = xmlNewComment(BAD_CAST n->u.comment);
+ xmlAddChild(record_ptr, ptr);
+ break;
+ case YAZ_MARC_LEADER:
+ xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
+ BAD_CAST n->u.leader);
+ break;
+ }
+ }
+ wrbuf_destroy(wr_cdata);
+ return 0;
+}
+
+#endif
+
+int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
+{
+ struct yaz_marc_node *n;
+ int indicator_length;
+ int identifier_length;
+ int length_data_entry;
+ int length_starting;
+ int length_implementation;
+ int data_offset = 0;
+ const char *leader = 0;
+ WRBUF wr_dir, wr_head, wr_data_tmp;
+ int base_address;
+
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ leader = n->u.leader;
+
+ if (!leader)
+ return -1;
+ if (!atoi_n_check(leader+10, 1, &indicator_length))
+ return -1;
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+ if (!atoi_n_check(leader+20, 1, &length_data_entry))
+ return -1;
+ if (!atoi_n_check(leader+21, 1, &length_starting))
+ return -1;
+ if (!atoi_n_check(leader+22, 1, &length_implementation))
+ return -1;
+
+ wr_data_tmp = wrbuf_alloc();
+ wr_dir = wrbuf_alloc();
+ for (n = mt->nodes; n; n = n->next)
+ {
+ int data_length = 0;
+ struct yaz_marc_subfield *s;
+
+ switch(n->which)
{
- wrbuf_puts (wr, ">");
- if (identifier_flag)
- wrbuf_puts (wr, "\n");
+ case YAZ_MARC_DATAFIELD:
+ wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
+ data_length += indicator_length;
+ wrbuf_rewind(wr_data_tmp);
+ for (s = n->u.datafield.subfields; s; s = s->next)
+ {
+ /* write dummy IDFS + content */
+ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
+ wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
+ marc_iconv_reset(mt, wr_data_tmp);
+ }
+ /* write dummy FS (makes MARC-8 to become ASCII) */
+ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
+ marc_iconv_reset(mt, wr_data_tmp);
+ data_length += wrbuf_len(wr_data_tmp);
+ break;
+ case YAZ_MARC_CONTROLFIELD:
+ wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
+
+ wrbuf_rewind(wr_data_tmp);
+ wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
+ n->u.controlfield.data);
+ marc_iconv_reset(mt, wr_data_tmp);
+ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
+ marc_iconv_reset(mt, wr_data_tmp);
+ data_length += wrbuf_len(wr_data_tmp);
+ break;
+ case YAZ_MARC_COMMENT:
+ break;
+ case YAZ_MARC_LEADER:
+ break;
}
- if (mt->xml == YAZ_MARC_LINE)
+ if (data_length)
{
- if (mt->debug)
- wrbuf_puts (wr, " Fields: ");
+ wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
+ wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
+ data_offset += data_length;
}
- if (identifier_flag)
+ }
+ /* mark end of directory */
+ wrbuf_putc(wr_dir, ISO2709_FS);
+
+ /* base address of data (comes after leader+directory) */
+ base_address = 24 + wrbuf_len(wr_dir);
+
+ wr_head = wrbuf_alloc();
+
+ /* write record length */
+ wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
+ /* from "original" leader */
+ wrbuf_write(wr_head, leader+5, 7);
+ /* base address of data */
+ wrbuf_printf(wr_head, "%05d", base_address);
+ /* from "original" leader */
+ wrbuf_write(wr_head, leader+17, 7);
+
+ wrbuf_write(wr, wrbuf_buf(wr_head), 24);
+ wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
+ wrbuf_destroy(wr_head);
+ wrbuf_destroy(wr_dir);
+ wrbuf_destroy(wr_data_tmp);
+
+ for (n = mt->nodes; n; n = n->next)
+ {
+ struct yaz_marc_subfield *s;
+
+ switch(n->which)
{
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
+ case YAZ_MARC_DATAFIELD:
+ wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
+ for (s = n->u.datafield.subfields; s; s = s->next)
{
- int i0;
- i++;
- switch(mt->xml)
- {
- case YAZ_MARC_ISO2709:
- --i;
- wrbuf_iconv_write(wr, mt->iconv_cd,
- buf+i, identifier_length);
- i += identifier_length;
- break;
- case YAZ_MARC_LINE:
- wrbuf_puts (wr, " $");
- for (j = 1; j<identifier_length; j++, i++)
- wrbuf_putc (wr, buf[i]);
- wrbuf_putc (wr, ' ');
- break;
- case YAZ_MARC_SIMPLEXML:
- wrbuf_puts (wr, " <subfield code=\"");
- for (j = 1; j<identifier_length; j++, i++)
- wrbuf_putc (wr, buf[i]);
- wrbuf_puts (wr, "\">");
- break;
- case YAZ_MARC_OAIMARC:
- wrbuf_puts (wr, " <subfield label=\"");
- for (j = 1; j<identifier_length; j++, i++)
- wrbuf_putc (wr, buf[i]);
- wrbuf_puts (wr, "\">");
- break;
- case YAZ_MARC_MARCXML:
- wrbuf_puts (wr, " <subfield code=\"");
- for (j = 1; j<identifier_length; j++, i++)
- wrbuf_putc (wr, buf[i]);
- wrbuf_puts (wr, "\">");
- break;
- }
- i0 = i;
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
- buf[i] != ISO2709_FS && i < end_offset)
- i++;
- marc_cdata(mt, buf + i0, i - i0, wr);
-
- if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
- marc_cdata(mt, buf + i, 1, wr);
-
- if (mt->xml == YAZ_MARC_SIMPLEXML ||
- mt->xml == YAZ_MARC_MARCXML ||
- mt->xml == YAZ_MARC_OAIMARC)
- wrbuf_puts (wr, "</subfield>\n");
+ wrbuf_putc(wr, ISO2709_IDFS);
+ wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
+ marc_iconv_reset(mt, wr);
}
+ wrbuf_putc(wr, ISO2709_FS);
+ break;
+ case YAZ_MARC_CONTROLFIELD:
+ wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
+ marc_iconv_reset(mt, wr);
+ wrbuf_putc(wr, ISO2709_FS);
+ break;
+ case YAZ_MARC_COMMENT:
+ break;
+ case YAZ_MARC_LEADER:
+ break;
}
- else
- {
- int i0 = i;
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
- i++;
- marc_cdata(mt, buf + i0, i - i0, wr);
- if (mt->xml == YAZ_MARC_ISO2709)
- marc_cdata(mt, buf + i, 1, wr);
- }
- if (mt->xml == YAZ_MARC_LINE)
- wrbuf_putc (wr, '\n');
- if (i < end_offset)
- wrbuf_puts (wr, " <!-- separator but not at end of field -->\n");
- if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
- wrbuf_puts (wr, " <!-- no separator at end of field -->\n");
- switch(mt->xml)
+ }
+ wrbuf_printf(wr, "%c", ISO2709_RS);
+ return 0;
+}
+
+int yaz_marc_write_json(yaz_marc_t mt, WRBUF w)
+{
+ int identifier_length;
+ struct yaz_marc_node *n;
+ const char *leader = 0;
+ int first = 1;
+
+ wrbuf_puts(w, "{\n");
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ leader = n->u.leader;
+
+ if (!leader)
+ return -1;
+
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+
+ wrbuf_puts(w, "\t\"leader\":\"");
+ wrbuf_json_puts(w, leader);
+ wrbuf_puts(w, "\",\n");
+ wrbuf_puts(w, "\t\"fields\":\n\t[\n");
+
+ for (n = mt->nodes; n; n = n->next)
+ {
+ struct yaz_marc_subfield *s;
+ const char *sep = "";
+ switch (n->which)
{
- case YAZ_MARC_SIMPLEXML:
- wrbuf_puts (wr, "</field>\n");
+ case YAZ_MARC_LEADER:
+ case YAZ_MARC_COMMENT:
break;
- case YAZ_MARC_OAIMARC:
- if (identifier_flag)
- wrbuf_puts (wr, " </varfield>\n");
+ case YAZ_MARC_CONTROLFIELD:
+ if (first)
+ first = 0;
else
- wrbuf_puts (wr, " </fixfield>\n");
+ wrbuf_puts(w, ",\n");
+ wrbuf_puts(w, "\t\t{\n\t\t\t\"");
+ wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag);
+ wrbuf_puts(w, "\":\"");
+ wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data);
+ wrbuf_puts(w, "\"\n\t\t}");
break;
- case YAZ_MARC_MARCXML:
- if (identifier_flag)
- wrbuf_puts (wr, " </datafield>\n");
+ case YAZ_MARC_DATAFIELD:
+ if (first)
+ first = 0;
else
- wrbuf_puts (wr, " </controlfield>\n");
+ wrbuf_puts(w, ",\n");
+
+ wrbuf_puts(w, "\t\t{\n\t\t\t\"");
+ wrbuf_json_puts(w, n->u.datafield.tag);
+ wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n");
+ for (s = n->u.datafield.subfields; s; s = s->next)
+ {
+ size_t using_code_len = get_subfield_len(mt, s->code_data,
+ identifier_length);
+ wrbuf_puts(w, sep);
+ sep = ",\n";
+ wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\"");
+ wrbuf_iconv_json_write(w, mt->iconv_cd,
+ s->code_data, using_code_len);
+ wrbuf_puts(w, "\":\"");
+ wrbuf_iconv_json_puts(w, mt->iconv_cd,
+ s->code_data + using_code_len);
+ wrbuf_puts(w, "\"\n\t\t\t\t\t}");
+ }
+ wrbuf_puts(w, "\n\t\t\t\t]");
+ if (n->u.datafield.indicator[0])
+ {
+ int i;
+ for (i = 0; n->u.datafield.indicator[i]; i++)
+ {
+ wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1,
+ n->u.datafield.indicator[i]);
+ }
+ }
+ wrbuf_puts(w, "\n\t\t\t}\n");
+ wrbuf_puts(w, "\n\t\t}");
break;
}
}
- switch (mt->xml)
- {
- case YAZ_MARC_LINE:
- wrbuf_puts (wr, "");
- break;
- case YAZ_MARC_SIMPLEXML:
- wrbuf_puts (wr, "</iso2709>\n");
- break;
- case YAZ_MARC_OAIMARC:
- wrbuf_puts (wr, "</oai_marc>\n");
- break;
- case YAZ_MARC_MARCXML:
- wrbuf_puts (wr, "</record>\n");
- break;
- case YAZ_MARC_ISO2709:
- wrbuf_putc (wr, ISO2709_RS);
- break;
- }
- return record_length;
+ wrbuf_puts(w, "\n\t]\n");
+ wrbuf_puts(w, "}\n");
+ return 0;
+}
+
+int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
+{
+ int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
+ if (r <= 0)
+ return r;
+ s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
+ if (s != 0)
+ return -1; /* error */
+ return r; /* OK, return length > 0 */
}
int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
- char **result, int *rsize)
+ const char **result, size_t *rsize)
{
- int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
- if (r > 0)
- {
- if (result)
- *result = wrbuf_buf(mt->m_wr);
- if (rsize)
- *rsize = wrbuf_len(mt->m_wr);
- }
+ int r;
+
+ wrbuf_rewind(mt->m_wr);
+ r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
+ if (result)
+ *result = wrbuf_cstr(mt->m_wr);
+ if (rsize)
+ *rsize = wrbuf_len(mt->m_wr);
return r;
}
void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
{
- if (mt)
- mt->xml = xmlmode;
+ mt->output_format = xmlmode;
}
void yaz_marc_debug(yaz_marc_t mt, int level)
mt->iconv_cd = cd;
}
-/* depricated */
-int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
+yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
{
- yaz_marc_t mt = yaz_marc_create();
- int r;
+ return mt->iconv_cd;
+}
- mt->debug = debug;
- mt->xml = xml;
- r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
- yaz_marc_destroy(mt);
- return r;
+void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
+{
+ struct yaz_marc_node *n;
+ char *leader = 0;
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ {
+ leader = n->u.leader;
+ memcpy(leader+off, str, strlen(str));
+ break;
+ }
}
-/* depricated */
-int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
+int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
{
- return yaz_marc_decode(buf, wr, debug, bsize, 0);
+ xfree(mt->leader_spec);
+ mt->leader_spec = 0;
+ if (leader_spec)
+ {
+ char dummy_leader[24];
+ if (marc_exec_leader(leader_spec, dummy_leader, 24))
+ return -1;
+ mt->leader_spec = xstrdup(leader_spec);
+ }
+ return 0;
}
-/* depricated */
-int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
+static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
{
- yaz_marc_t mt = yaz_marc_create();
- int r;
+ const char *cp = leader_spec;
+ while (cp)
+ {
+ char val[21];
+ int pos;
+ int no_read = 0, no = 0;
- mt->debug = debug;
- r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
- if (!outf)
- outf = stdout;
- if (r > 0)
- fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
- yaz_marc_destroy(mt);
- return r;
+ no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
+ if (no < 2 || no_read < 3)
+ return -1;
+ if (pos < 0 || (size_t) pos >= size)
+ return -1;
+
+ if (*val == '\'')
+ {
+ const char *vp = strchr(val+1, '\'');
+ size_t len;
+
+ if (!vp)
+ return -1;
+ len = vp-val-1;
+ if (len + pos > size)
+ return -1;
+ memcpy(leader + pos, val+1, len);
+ }
+ else if (*val >= '0' && *val <= '9')
+ {
+ int ch = atoi(val);
+ leader[pos] = ch;
+ }
+ else
+ return -1;
+ cp += no_read;
+ if (*cp != ',')
+ break;
+
+ cp++;
+ }
+ return 0;
}
-/* depricated */
-int marc_display_ex (const char *buf, FILE *outf, int debug)
+int yaz_marc_decode_formatstr(const char *arg)
{
- return marc_display_exl (buf, outf, debug, -1);
+ int mode = -1;
+ if (!strcmp(arg, "marc"))
+ mode = YAZ_MARC_ISO2709;
+ if (!strcmp(arg, "marcxml"))
+ mode = YAZ_MARC_MARCXML;
+ if (!strcmp(arg, "turbomarc"))
+ mode = YAZ_MARC_TURBOMARC;
+ if (!strcmp(arg, "marcxchange"))
+ mode = YAZ_MARC_XCHANGE;
+ if (!strcmp(arg, "line"))
+ mode = YAZ_MARC_LINE;
+ if (!strcmp(arg, "json"))
+ mode = YAZ_MARC_JSON;
+ return mode;
}
-/* depricated */
-int marc_display (const char *buf, FILE *outf)
+void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
{
- return marc_display_ex (buf, outf, 0);
+ mt->write_using_libxml2 = enable;
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+