X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarcdisp.c;h=8a409399a83c5ba47fdc88eda0d65917cb213799;hp=f56f280f35dd9a2d39f040ea41fb202b9df0bb93;hb=a83a2555fe95266031b6f3043e344eb1c1abd193;hpb=e26a0bc5ba4e66ca15bc2863b545e7d6b7d9752e diff --git a/src/marcdisp.c b/src/marcdisp.c index f56f280..8a40939 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data * See the file LICENSE for details. - * - * $Id: marcdisp.c,v 1.28 2006-04-20 20:35:02 adam Exp $ */ /** @@ -20,19 +18,26 @@ #include #include +#include #include #include #include #include #include +#include +#include -#if HAVE_XML2 +#if YAZ_HAVE_XML2 #include #include #endif -static void yaz_marc_reset(yaz_marc_t mt); - +enum yaz_collection_state { + no_collection, + collection_first, + collection_second +}; + /** \brief node types for yaz_marc_node */ enum YAZ_MARC_NODE_TYPE { @@ -82,11 +87,15 @@ struct yaz_marc_subfield { struct yaz_marc_t_ { WRBUF m_wr; NMEM nmem; - int xml; + int input_format; + int output_format; int debug; + int write_using_libxml2; + enum yaz_collection_state enable_collection; yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; + char *leader_spec; struct yaz_marc_node *nodes; struct yaz_marc_node **nodes_pp; struct yaz_marc_subfield **subfield_pp; @@ -95,10 +104,13 @@ struct yaz_marc_t_ { yaz_marc_t yaz_marc_create(void) { yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt)); - mt->xml = YAZ_MARC_LINE; + mt->output_format = YAZ_MARC_LINE; mt->debug = 0; + mt->write_using_libxml2 = 0; + mt->enable_collection = no_collection; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; + mt->leader_spec = 0; strcpy(mt->subfield_str, " $"); strcpy(mt->endline_str, "\n"); @@ -112,71 +124,86 @@ void yaz_marc_destroy(yaz_marc_t mt) if (!mt) return ; nmem_destroy(mt->nmem); - wrbuf_free (mt->m_wr, 1); - xfree (mt); + wrbuf_destroy(mt->m_wr); + xfree(mt->leader_spec); + xfree(mt); +} + +NMEM yaz_marc_get_nmem(yaz_marc_t mt) +{ + return mt->nmem; +} + +static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr) +{ + wrbuf_iconv_reset(wr, mt->iconv_cd); } -struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) +static int marc_exec_leader(const char *leader_spec, char *leader, + size_t size); + + +static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) { - struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n)); + struct yaz_marc_node *n = (struct yaz_marc_node *) + nmem_malloc(mt->nmem, sizeof(*n)); n->next = 0; *mt->nodes_pp = n; mt->nodes_pp = &n->next; return n; } -void yaz_marc_add_comment(yaz_marc_t mt, char *comment) +#if YAZ_HAVE_XML2 +void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const xmlNode *ptr_data) { struct yaz_marc_node *n = yaz_marc_add_node(mt); - n->which = YAZ_MARC_COMMENT; - n->u.comment = nmem_strdup(mt->nmem, comment); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); } -#if HAVE_XML2 -static char *yaz_marc_get_xml_text(const xmlNode *ptr_cdata, NMEM nmem) +void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag, + const xmlNode *ptr_data) { - char *cdata; - int len = 0; - const xmlNode *ptr; - - for (ptr = ptr_cdata; ptr; ptr = ptr->next) - if (ptr->type == XML_TEXT_NODE) - len += xmlStrlen(ptr->content); - cdata = (char *) nmem_malloc(nmem, len+1); - *cdata = '\0'; - for (ptr = ptr_cdata; ptr; ptr = ptr->next) - if (ptr->type == XML_TEXT_NODE) - strcat(cdata, (const char *) ptr->content); - return cdata; + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = tag; + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); } + #endif + +void yaz_marc_add_comment(yaz_marc_t mt, char *comment) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_COMMENT; + n->u.comment = nmem_strdup(mt->nmem, comment); +} + void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...) { va_list ap; char buf[200]; - va_start(ap, fmt); -#ifdef WIN32 - _vsnprintf(buf, sizeof(buf)-1, fmt, ap); -#else -/* !WIN32 */ -#if HAVE_VSNPRINTF - vsnprintf(buf, sizeof(buf), fmt, ap); -#else - vsprintf(buf, fmt, ap); -#endif -#endif -/* WIN32 */ + va_start(ap, fmt); + yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap); yaz_marc_add_comment(mt, buf); va_end (ap); } +int yaz_marc_get_debug(yaz_marc_t mt) +{ + return mt->debug; +} + void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len) { struct yaz_marc_node *n = yaz_marc_add_node(mt); n->which = YAZ_MARC_LEADER; n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len); + marc_exec_leader(mt->leader_spec, n->u.leader, leader_len); } void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, @@ -200,17 +227,6 @@ void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, } } -#if HAVE_XML2 -void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, - const xmlNode *ptr_data) -{ - struct yaz_marc_node *n = yaz_marc_add_node(mt); - n->which = YAZ_MARC_CONTROLFIELD; - n->u.controlfield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem); - n->u.controlfield.data = yaz_marc_get_xml_text(ptr_data, mt->nmem); -} -#endif - void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, const char *indicator, size_t indicator_len) { @@ -225,13 +241,13 @@ void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, mt->subfield_pp = &n->u.datafield.subfields; } -#if HAVE_XML2 +#if YAZ_HAVE_XML2 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const char *indicator, size_t indicator_len) { struct yaz_marc_node *n = yaz_marc_add_node(mt); n->which = YAZ_MARC_DATAFIELD; - n->u.datafield.tag = yaz_marc_get_xml_text(ptr_tag, mt->nmem); + n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); n->u.datafield.indicator = nmem_strdupn(mt->nmem, indicator, indicator_len); n->u.datafield.subfields = 0; @@ -239,6 +255,25 @@ void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, /* make subfield_pp the current (last one) */ mt->subfield_pp = &n->u.datafield.subfields; } + +struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = tag_value; + n->u.datafield.indicator = 0; + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; + return n; +} + +void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator) +{ + n->u.datafield.indicator = indicator; +} + #endif void yaz_marc_add_subfield(yaz_marc_t mt, @@ -259,7 +294,8 @@ void yaz_marc_add_subfield(yaz_marc_t mt, if (mt->subfield_pp) { - struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n)); + struct yaz_marc_subfield *n = (struct yaz_marc_subfield *) + nmem_malloc(mt->nmem, sizeof(*n)); n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len); n->next = 0; /* mark subfield_pp to point to this one, so we append here next */ @@ -268,31 +304,13 @@ void yaz_marc_add_subfield(yaz_marc_t mt, } } -static int atoi_n_check(const char *buf, int size, int *val) -{ - if (!isdigit(*(const unsigned char *) buf)) - return 0; - *val = atoi_n(buf, size); - return 1; -} - -/** \brief reads the MARC 24 bytes leader and checks content - \param mt handle - \param leader of the 24 byte leader - \param indicator_length indicator length - \param identifier_length identifier length - \param base_address base address - \param length_data_entry length of data entry - \param length_starting length of starting - \param length_implementation length of implementation defined data -*/ -static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c, - int *indicator_length, - int *identifier_length, - int *base_address, - int *length_data_entry, - int *length_starting, - int *length_implementation) +void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, + int *indicator_length, + int *identifier_length, + int *base_address, + int *length_data_entry, + int *length_starting, + int *length_implementation) { char leader[24]; @@ -394,7 +412,7 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf) return 1; /* we don't know */ } -static void yaz_marc_reset(yaz_marc_t mt) +void yaz_marc_reset(yaz_marc_t mt) { nmem_reset(mt->nmem); mt->nodes = 0; @@ -402,6 +420,52 @@ static void yaz_marc_reset(yaz_marc_t mt) mt->subfield_pp = 0; } +int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + for (n = mt->nodes; n; n = n->next) + { + switch(n->which) + { + case YAZ_MARC_COMMENT: + wrbuf_iconv_write(wr, mt->iconv_cd, + n->u.comment, strlen(n->u.comment)); + wrbuf_puts(wr, "\n"); + break; + default: + break; + } + } + return 0; +} + +static size_t get_subfield_len(yaz_marc_t mt, const char *data, + int identifier_length) +{ + /* if identifier length is 2 (most MARCs) or less (probably an error), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + if (identifier_length > 2) + return identifier_length - 1; + else + return cdata_one_character(mt, data); +} + int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; @@ -430,57 +494,91 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) n->u.datafield.indicator); for (s = n->u.datafield.subfields; s; s = s->next) { - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); wrbuf_puts (wr, mt->subfield_str); wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, using_code_len); - wrbuf_printf(wr, " "); + wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data + using_code_len); + marc_iconv_reset(mt, wr); } wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_CONTROLFIELD: - wrbuf_printf(wr, "%s ", n->u.controlfield.tag); + wrbuf_printf(wr, "%s", n->u.controlfield.tag); + wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr); wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_COMMENT: wrbuf_puts(wr, "("); wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); + marc_iconv_reset(mt, wr); wrbuf_puts(wr, ")\n"); break; case YAZ_MARC_LEADER: wrbuf_printf(wr, "%s\n", n->u.leader); } } + wrbuf_puts(wr, "\n"); + return 0; +} + +int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr) +{ + if (mt->enable_collection == collection_second) + { + switch(mt->output_format) + { + case YAZ_MARC_MARCXML: + case YAZ_MARC_TMARCXML: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_XCHANGE: + wrbuf_printf(wr, "\n"); + break; + } + } return 0; } +void yaz_marc_enable_collection(yaz_marc_t mt) +{ + mt->enable_collection = collection_first; +} + int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) { - switch(mt->xml) + switch(mt->output_format) { case YAZ_MARC_LINE: return yaz_marc_write_line(mt, wr); case YAZ_MARC_MARCXML: + case YAZ_MARC_TMARCXML: return yaz_marc_write_marcxml(mt, wr); case YAZ_MARC_XCHANGE: return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */ case YAZ_MARC_ISO2709: return yaz_marc_write_iso2709(mt, wr); + case YAZ_MARC_CHECK: + return yaz_marc_write_check(mt, wr); } return -1; } +const char *collection_name[2] = { "collection", "collection"}; +const char *record_name[2] = { "record", "r"}; +const char *leader_name[2] = { "leader", "l"}; +const char *controlfield_name[2]= { "controlfield", "c"}; +const char *datafield_name[2] = { "datafield", "d"}; +const char *subfield_name[2] = { "subfield", "s"}; + + /** \brief common MARC XML/Xchange writer \param mt handle \param wr WRBUF output @@ -488,15 +586,17 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) \param format record format (e.g. "MARC21") \param type record type (e.g. "Bibliographic") */ -static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, - const char *ns, - const char *format, - const char *type) +static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; + int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML; + for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { @@ -508,8 +608,172 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; + + if (mt->enable_collection != no_collection) + { + if (mt->enable_collection == collection_first) + wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns); + mt->enable_collection = collection_second; + wrbuf_printf(wr, "<%s", record_name[turbo]); + } + else + { + wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns); + } + if (format) + wrbuf_printf(wr, " format=\"%.80s\"", format); + if (type) + wrbuf_printf(wr, " type=\"%.80s\"", type); + wrbuf_printf(wr, ">\n"); + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + + wrbuf_printf(wr, " <%s", datafield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, "\""); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(wr, " ind%d=\"", i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); + } + } + wrbuf_printf(wr, ">\n"); + } else { + // TODO Not CDATA. + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + // Write tag + wrbuf_printf(wr, ">\n"); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(wr, " ", i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_printf(wr, "", i+1); + wrbuf_puts(wr, "\n"); + } + } + } + for (s = n->u.datafield.subfields; s; s = s->next) + { + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_printf(wr, " <%s", subfield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " code=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } else { + // TODO check this. encode special characters. + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_puts(wr, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data + using_code_len, + strlen(s->code_data + using_code_len)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "iconv_cd, + s->code_data, using_code_len); + wrbuf_puts(wr, ">\n"); + } + wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, ">\n", datafield_name[turbo]); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, " <%s", controlfield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } + else { + //TODO convert special + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_puts(wr, ">\n"); + break; + case YAZ_MARC_COMMENT: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, " <%s>", leader_name[turbo]); + wrbuf_iconv_write_cdata(wr, + 0 /* no charset conversion for leader */, + n->u.leader, strlen(n->u.leader)); + wrbuf_printf(wr, " ", leader_name[turbo]); + } + } + wrbuf_printf(wr, "", record_name[turbo]); + return 0; +} + +static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; - wrbuf_printf(wr, "nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + if (mt->enable_collection != no_collection) + { + if (mt->enable_collection == collection_first) + wrbuf_printf(wr, "\n", ns); + mt->enable_collection = collection_second; + wrbuf_printf(wr, "nodes; n; n = n->next) { struct yaz_marc_subfield *s; + switch(n->which) { case YAZ_MARC_DATAFIELD: @@ -533,28 +798,24 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, wrbuf_printf(wr, " ind%d=\"", i+1); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.indicator+i, 1); - wrbuf_printf(wr, "\""); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); } } wrbuf_printf(wr, ">\n"); for (s = n->u.datafield.subfields; s; s = s->next) { - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); - - wrbuf_puts(wr, " code_data, + identifier_length); + wrbuf_iconv_puts(wr, mt->iconv_cd, " iconv_cd, s->code_data, using_code_len); - wrbuf_puts(wr, "\">"); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, s->code_data + using_code_len, strlen(s->code_data + using_code_len)); - wrbuf_puts(wr, "\n"); + marc_iconv_reset(mt, wr); + wrbuf_iconv_puts(wr, mt->iconv_cd, ""); + wrbuf_puts(wr, "\n"); } wrbuf_printf(wr, " \n"); break; @@ -562,12 +823,19 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, wrbuf_printf(wr, " iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); - wrbuf_printf(wr, "\">"); - wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); - wrbuf_printf(wr, "\n"); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + + marc_iconv_reset(mt, wr); + wrbuf_iconv_puts(wr, mt->iconv_cd, ""); + wrbuf_puts(wr, "\n"); break; case YAZ_MARC_COMMENT: - wrbuf_printf(wr, "\n", n->u.comment); + wrbuf_printf(wr, "\n"); break; case YAZ_MARC_LEADER: wrbuf_printf(wr, " "); @@ -581,10 +849,55 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, return 0; } + +static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) +{ + if (mt->write_using_libxml2) + { +#if YAZ_HAVE_XML2 + int ret; + xmlNode *root_ptr; + + if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML) + ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); + else // Check for Turbo XML + ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type); + if (ret == 0) + { + xmlChar *buf_out; + xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0"); + int len_out; + + xmlDocSetRootElement(doc, root_ptr); + xmlDocDumpMemory(doc, &buf_out, &len_out); + + wrbuf_write(wr, (const char *) buf_out, len_out); + wrbuf_puts(wr, ""); + xmlFree(buf_out); + xmlFreeDoc(doc); + } + return ret; +#else + return -1; +#endif + } + else + return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type); +} + int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) { - yaz_marc_modify_leader(mt, 9, "a"); - return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim", + /* set leader 09 to 'a' for UNICODE */ + /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ + if (!mt->leader_spec) + yaz_marc_modify_leader(mt, 9, "a"); + char *name_space = "http://www.loc.gov/MARC21/slim"; + if (mt->output_format == YAZ_MARC_TMARCXML) + name_space = "http://www.indexdata.com/MARC21/turboxml"; + return yaz_marc_write_marcxml_ns(mt, wr, name_space, 0, 0); } @@ -593,506 +906,430 @@ int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr, const char *type) { return yaz_marc_write_marcxml_ns(mt, wr, - "http://www.bs.dk/standards/MarcXchange", + "info:lc/xmlns/marcxchange-v1", 0, 0); } -int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) +#if YAZ_HAVE_XML2 + +void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length) +{ + xmlNode *ptr; + struct yaz_marc_subfield *s; + int turbo = mt->output_format == YAZ_MARC_TMARCXML; + if (!turbo) { + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); + } + else { + //TODO consider if safe + char field[10]; + field[0] = 'd'; + strncpy(field + 1, n->u.datafield.tag, 3); + field[4] = '\0'; + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0); + } + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + char ind_str[6]; + char ind_val[2]; + + ind_val[0] = n->u.datafield.indicator[i]; + ind_val[1] = '\0'; + if (!turbo) { + sprintf(ind_str, "ind%d", i+1); + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); + } + else { + sprintf(ind_str, "i%d", i+1); + xmlNewTextChild(ptr, ns_record, BAD_CAST ind_str, BAD_CAST ind_val); + } + } + } + WRBUF subfield_name = wrbuf_alloc(); + for (s = n->u.datafield.subfields; s; s = s->next) + { + xmlNode *ptr_subfield; + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len); + marc_iconv_reset(mt, wr_cdata); + + if (!turbo) { + ptr_subfield = xmlNewTextChild( + ptr, ns_record, + BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", + BAD_CAST wrbuf_cstr(wr_cdata)); + } + else { // Turbo format + wrbuf_rewind(subfield_name); + wrbuf_puts(subfield_name, "s"); + // TODO Map special codes to something possible for XML ELEMENT names + if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') || + (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') || + (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z')) + { + wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len); + } + else { + char buffer[2*using_code_len + 1]; + int index; + for (index = 0; index < using_code_len; index++) { + sprintf(buffer + 2*index, "%02X", (unsigned char) s->code_data[index] & 0xFF); + }; + buffer[2*(index+1)] = 0; + wrbuf_puts(subfield_name, "-"); + wrbuf_puts(subfield_name, buffer); + yaz_log(YLOG_WARN, "Using numeric value in element name: %s", buffer); + } + ptr_subfield = xmlNewTextChild(ptr, ns_record, + BAD_CAST wrbuf_cstr(subfield_name), + BAD_CAST wrbuf_cstr(wr_cdata)); + } + } + wrbuf_destroy(subfield_name); +} + +int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) { struct yaz_marc_node *n; - int indicator_length; int identifier_length; - int length_data_entry; - int length_starting; - int length_implementation; - int data_offset = 0; const char *leader = 0; - WRBUF wr_dir, wr_head; - int base_address; - + xmlNode *record_ptr; + xmlNsPtr ns_record; + WRBUF wr_cdata = 0; + int turbo = mt->output_format == YAZ_MARC_TMARCXML; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) + { leader = n->u.leader; + break; + } if (!leader) return -1; - if (!atoi_n_check(leader+10, 1, &indicator_length)) - return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; - if (!atoi_n_check(leader+20, 1, &length_data_entry)) - return -1; - if (!atoi_n_check(leader+21, 1, &length_starting)) - return -1; - if (!atoi_n_check(leader+22, 1, &length_implementation)) - return -1; - wr_dir = wrbuf_alloc(); + wr_cdata = wrbuf_alloc(); + + record_ptr = xmlNewNode(0, BAD_CAST "r"); + *root_ptr = record_ptr; + + ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); + xmlSetNs(record_ptr, ns_record); + + if (format) + xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); + if (type) + xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); for (n = mt->nodes; n; n = n->next) { - int data_length = 0; struct yaz_marc_subfield *s; + xmlNode *ptr; + switch(n->which) { case YAZ_MARC_DATAFIELD: - wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag); - data_length += indicator_length; - for (s = n->u.datafield.subfields; s; s = s->next) - data_length += 1+strlen(s->code_data); - data_length++; + add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length); break; case YAZ_MARC_CONTROLFIELD: - wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag); - data_length += strlen(n->u.controlfield.data); - data_length++; + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_cdata); + + if (!turbo) { + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST "controlfield", + BAD_CAST wrbuf_cstr(wr_cdata)); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); + } + else { + // TODO required iconv? + char field[10]; + field[0] = 'c'; + strncpy(field + 1, n->u.controlfield.tag, 3); + field[4] = '\0'; + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST field, + BAD_CAST wrbuf_cstr(wr_cdata)); + } + break; case YAZ_MARC_COMMENT: + ptr = xmlNewComment(BAD_CAST n->u.comment); + xmlAddChild(record_ptr, ptr); break; case YAZ_MARC_LEADER: + { + char *field = "leader"; + if (turbo) + field = "l"; + xmlNewTextChild(record_ptr, ns_record, BAD_CAST field, + BAD_CAST n->u.leader); + } break; } - if (data_length) - { - wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length); - wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset); - data_offset += data_length; - } } - /* mark end of directory */ - wrbuf_putc(wr_dir, ISO2709_FS); + wrbuf_destroy(wr_cdata); + return 0; +} - /* base address of data (comes after leader+directory) */ - base_address = 24 + wrbuf_len(wr_dir); - wr_head = wrbuf_alloc(); +int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + xmlNode *record_ptr; + xmlNsPtr ns_record; + WRBUF wr_cdata = 0; - /* write record length */ - wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1); - /* from "original" leader */ - wrbuf_write(wr_head, leader+5, 7); - /* base address of data */ - wrbuf_printf(wr_head, "%05d", base_address); - /* from "original" leader */ - wrbuf_write(wr_head, leader+17, 7); + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } - wrbuf_write(wr, wrbuf_buf(wr_head), 24); - wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); - wrbuf_free(wr_head, 1); - wrbuf_free(wr_dir, 1); + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + wr_cdata = wrbuf_alloc(); + + record_ptr = xmlNewNode(0, BAD_CAST "record"); + *root_ptr = record_ptr; + ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); + xmlSetNs(record_ptr, ns_record); + + if (format) + xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); + if (type) + xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); for (n = mt->nodes; n; n = n->next) { struct yaz_marc_subfield *s; + xmlNode *ptr; + switch(n->which) { case YAZ_MARC_DATAFIELD: - wrbuf_printf(wr, "%.*s", indicator_length, - n->u.datafield.indicator); + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + char ind_str[6]; + char ind_val[2]; + + sprintf(ind_str, "ind%d", i+1); + ind_val[0] = n->u.datafield.indicator[i]; + ind_val[1] = '\0'; + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); + } + } for (s = n->u.datafield.subfields; s; s = s->next) - wrbuf_printf(wr, "%c%s", ISO2709_IDFS, s->code_data); - wrbuf_printf(wr, "%c", ISO2709_FS); + { + xmlNode *ptr_subfield; + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, + s->code_data + using_code_len); + marc_iconv_reset(mt, wr_cdata); + ptr_subfield = xmlNewTextChild( + ptr, ns_record, + BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); + + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd, + s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", + BAD_CAST wrbuf_cstr(wr_cdata)); + } break; case YAZ_MARC_CONTROLFIELD: - wrbuf_printf(wr, "%s%c", n->u.controlfield.data, ISO2709_FS); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_cdata); + + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST "controlfield", + BAD_CAST wrbuf_cstr(wr_cdata)); + + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); break; case YAZ_MARC_COMMENT: + ptr = xmlNewComment(BAD_CAST n->u.comment); + xmlAddChild(record_ptr, ptr); break; case YAZ_MARC_LEADER: + xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader", + BAD_CAST n->u.leader); break; } } - wrbuf_printf(wr, "%c", ISO2709_RS); + wrbuf_destroy(wr_cdata); return 0; } -#if HAVE_XML2 -int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) -{ - for (; ptr; ptr = ptr->next) - { - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "subfield")) - { - size_t ctrl_data_len = 0; - char *ctrl_data_buf = 0; - const xmlNode *p = 0, *ptr_code = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, "code")) - ptr_code = attr->children; - else - { - yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'subfield'", - attr->name); - return -1; - } - if (!ptr_code) - { - yaz_marc_cprintf( - mt, "Missing attribute 'code' for 'subfield'" ); - return -1; - } - if (ptr_code->type == XML_TEXT_NODE) - { - ctrl_data_len = - strlen((const char *)ptr_code->content); - } - else - { - yaz_marc_cprintf( - mt, "Missing value for 'code' in 'subfield'" ); - return -1; - } - for (p = ptr->children; p ; p = p->next) - if (p->type == XML_TEXT_NODE) - ctrl_data_len += strlen((const char *)p->content); - ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1); - strcpy(ctrl_data_buf, (const char *)ptr_code->content); - for (p = ptr->children; p ; p = p->next) - if (p->type == XML_TEXT_NODE) - strcat(ctrl_data_buf, (const char *)p->content); - yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len); - } - else - { - yaz_marc_cprintf( - mt, "Expected element 'subfield', got '%.80s'", ptr->name); - return -1; - } - } - } - return 0; -} -static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) + + +#endif + +int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) { + struct yaz_marc_node *n; int indicator_length; int identifier_length; - int base_address; int length_data_entry; int length_starting; int length_implementation; + int data_offset = 0; const char *leader = 0; - const xmlNode *ptr = *ptr_p; - - for(; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "leader")) - { - xmlNode *p = ptr->children; - for(; p; p = p->next) - if (p->type == XML_TEXT_NODE) - leader = (const char *) p->content; - break; - } - else - { - yaz_marc_cprintf( - mt, "Expected element 'leader', got '%.80s'", ptr->name); - return -1; - } - } + WRBUF wr_dir, wr_head, wr_data_tmp; + int base_address; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + leader = n->u.leader; + if (!leader) - { - yaz_marc_cprintf(mt, "Missing element 'leader'"); return -1; - } - if (strlen(leader) != 24) - { - yaz_marc_cprintf(mt, "Bad length %d of leader data." - " Must have length of 24 characters", strlen(leader)); + if (!atoi_n_check(leader+10, 1, &indicator_length)) return -1; - } - yaz_marc_read_leader(mt, leader, - &indicator_length, - &identifier_length, - &base_address, - &length_data_entry, - &length_starting, - &length_implementation); - *ptr_p = ptr; - return 0; -} - -static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) -{ - for(; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "controlfield")) - { - const xmlNode *ptr_tag = 0; - struct _xmlAttr *attr; - for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, "tag")) - ptr_tag = attr->children; - else - { - yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'controlfield'", - attr->name); - return -1; - } - if (!ptr_tag) - { - yaz_marc_cprintf( - mt, "Missing attribute 'tag' for 'controlfield'" ); - return -1; - } - yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children); - } - else if (!strcmp((const char *) ptr->name, "datafield")) - { - char indstr[11]; /* 0(unused), 1,....9, + zero term */ - const xmlNode *ptr_tag = 0; - struct _xmlAttr *attr; - int i; - for (i = 0; i<11; i++) - indstr[i] = '\0'; - for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, "tag")) - ptr_tag = attr->children; - else if (strlen((const char *)attr->name) == 4 && - !memcmp(attr->name, "ind", 3)) - { - int no = atoi((const char *)attr->name+3); - if (attr->children - && attr->children->type == XML_TEXT_NODE) - indstr[no] = attr->children->content[0]; - } - else - { - yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'datafield'", - attr->name); - return -1; - } - if (!ptr_tag) - { - yaz_marc_cprintf( - mt, "Missing attribute 'tag' for 'datafield'" ); - return -1; - } - /* note that indstr[0] is unused so we use indstr[1..] */ - yaz_marc_add_datafield_xml(mt, ptr_tag, - indstr+1, strlen(indstr+1)); - - if (yaz_marc_read_xml_subfields(mt, ptr->children)) - return -1; - } - else - { - yaz_marc_cprintf(mt, - "Expected element controlfield or datafield," - " got %.80s", ptr->name); - return -1; - } - } - return 0; -} - -int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode) -{ - const xmlNode *ptr = xmlnode; - for(; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE) - { - if (!strcmp((const char *) ptr->name, "record")) - break; - else - { - yaz_marc_cprintf( - mt, "Unknown element '%.80s' in MARC XML reader", - ptr->name); - return -1; - } - } - if (!ptr) - { - yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record"); + if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; - } - /* ptr points to record node now */ - ptr = ptr->children; - if (yaz_marc_read_xml_leader(mt, &ptr)) + if (!atoi_n_check(leader+20, 1, &length_data_entry)) return -1; - return yaz_marc_read_xml_fields(mt, ptr->next); -} -#else -int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode) -{ - return -1; -} -#endif - -int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize) -{ - int entry_p; - int record_length; - int indicator_length; - int identifier_length; - int end_of_directory; - int base_address; - int length_data_entry; - int length_starting; - int length_implementation; - - yaz_marc_reset(mt); - - record_length = atoi_n (buf, 5); - if (record_length < 25) - { - yaz_marc_cprintf(mt, "Record length %d < 24", record_length); + if (!atoi_n_check(leader+21, 1, &length_starting)) return -1; - } - /* ballout if bsize is known and record_length is less than that */ - if (bsize != -1 && record_length > bsize) - { - yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d", - record_length, bsize); + if (!atoi_n_check(leader+22, 1, &length_implementation)) return -1; - } - if (mt->debug) - yaz_marc_cprintf(mt, "Record length %5d", record_length); - - yaz_marc_read_leader(mt, buf, - &indicator_length, - &identifier_length, - &base_address, - &length_data_entry, - &length_starting, - &length_implementation); - - /* First pass. determine length of directory & base of data */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + + wr_data_tmp = wrbuf_alloc(); + wr_dir = wrbuf_alloc(); + for (n = mt->nodes; n; n = n->next) { - /* length of directory entry */ - int l = 3 + length_data_entry + length_starting; - if (entry_p + l >= record_length) - { - yaz_marc_cprintf(mt, "Directory offset %d: end of record." - " Missing FS char", entry_p); - return -1; - } - if (mt->debug) - { - yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s", - entry_p, buf+entry_p); - } - /* Check for digits in length info */ - while (--l >= 3) - if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) - break; - if (l >= 3) + int data_length = 0; + struct yaz_marc_subfield *s; + + switch(n->which) { - /* Not all digits, so stop directory scan */ - yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data" - " length and/or length starting", entry_p); + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag); + data_length += indicator_length; + wrbuf_rewind(wr_data_tmp); + for (s = n->u.datafield.subfields; s; s = s->next) + { + /* write dummy IDFS + content */ + wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); + wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data); + marc_iconv_reset(mt, wr_data_tmp); + } + /* write dummy FS (makes MARC-8 to become ASCII) */ + wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); + marc_iconv_reset(mt, wr_data_tmp); + data_length += wrbuf_len(wr_data_tmp); break; - } - entry_p += 3 + length_data_entry + length_starting; - } - end_of_directory = entry_p; - if (base_address != entry_p+1) - { - yaz_marc_cprintf(mt, "Base address not at end of directory," - " base %d, end %d", base_address, entry_p+1); - } + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag); - /* Second pass. parse control - and datafields */ - for (entry_p = 24; entry_p != end_of_directory; ) - { - int data_length; - int data_offset; - int end_offset; - int i; - char tag[4]; - int identifier_flag = 0; - int entry_p0 = entry_p; - - memcpy (tag, buf+entry_p, 3); - entry_p += 3; - tag[3] = '\0'; - data_length = atoi_n(buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n(buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (data_length <= 0 || data_offset < 0) + wrbuf_rewind(wr_data_tmp); + wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, + n->u.controlfield.data); + marc_iconv_reset(mt, wr_data_tmp); + wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */ + marc_iconv_reset(mt, wr_data_tmp); + data_length += wrbuf_len(wr_data_tmp); break; - - if (mt->debug) - { - yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d," - " data-offset %d", - tag, entry_p0, data_length, data_offset); - } - if (end_offset >= record_length) - { - yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d", - entry_p0, end_offset, record_length); + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_LEADER: break; } - - if (memcmp (tag, "00", 2)) - identifier_flag = 1; /* if not 00X assume subfields */ - else if (indicator_length < 4 && indicator_length > 0) + if (data_length) { - /* Danmarc 00X have subfields */ - if (buf[i + indicator_length] == ISO2709_IDFS) - identifier_flag = 1; - else if (buf[i + indicator_length + 1] == ISO2709_IDFS) - identifier_flag = 2; + wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length); + wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset); + data_offset += data_length; } + } + /* mark end of directory */ + wrbuf_putc(wr_dir, ISO2709_FS); - if (identifier_flag) - { - /* datafield */ - i += identifier_flag-1; - yaz_marc_add_datafield(mt, tag, buf+i, indicator_length); - i += indicator_length; + /* base address of data (comes after leader+directory) */ + base_address = 24 + wrbuf_len(wr_dir); - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + wr_head = wrbuf_alloc(); + + /* write record length */ + wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1); + /* from "original" leader */ + wrbuf_write(wr_head, leader+5, 7); + /* base address of data */ + wrbuf_printf(wr_head, "%05d", base_address); + /* from "original" leader */ + wrbuf_write(wr_head, leader+17, 7); + + wrbuf_write(wr, wrbuf_buf(wr_head), 24); + wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); + wrbuf_destroy(wr_head); + wrbuf_destroy(wr_dir); + wrbuf_destroy(wr_data_tmp); + + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr, "%.*s", indicator_length, + n->u.datafield.indicator); + for (s = n->u.datafield.subfields; s; s = s->next) { - int code_offset = i+1; - - i ++; - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS) - i++; - yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset); + wrbuf_putc(wr, ISO2709_IDFS); + wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data); + marc_iconv_reset(mt, wr); } - } - else - { - /* controlfield */ - int i0 = i; - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - i++; - yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0); - } - if (i < end_offset) - { - yaz_marc_cprintf(mt, "Separator but not at end of field length=%d", - data_length); - } - if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - { - yaz_marc_cprintf(mt, "No separator at end of field length=%d", - data_length); + wrbuf_putc(wr, ISO2709_FS); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr); + wrbuf_putc(wr, ISO2709_FS); + break; + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_LEADER: + break; } } - return record_length; + wrbuf_printf(wr, "%c", ISO2709_RS); + return 0; } + int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) { int s, r = yaz_marc_read_iso2709(mt, buf, bsize); @@ -1105,25 +1342,63 @@ int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) } int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, - char **result, int *rsize) + const char **result, size_t *rsize) { int r; wrbuf_rewind(mt->m_wr); r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); if (result) - *result = wrbuf_buf(mt->m_wr); + *result = wrbuf_cstr(mt->m_wr); if (rsize) *rsize = wrbuf_len(mt->m_wr); return r; } -void yaz_marc_xml(yaz_marc_t mt, int xmlmode) +void yaz_marc_set_read_format(yaz_marc_t mt, int format) +{ + if (mt) + mt->input_format = format; +} + +int yaz_marc_get_read_format(yaz_marc_t mt) { if (mt) - mt->xml = xmlmode; + return mt->input_format; + return -1; } + +void yaz_marc_set_write_format(yaz_marc_t mt, int format) +{ + if (mt) { + mt->output_format = format; +/* + // Force using libxml2 + if (mt->output_format == YAZ_MARC_TMARCXML) + mt->write_using_libxml2 = 1; +*/ + } +} + +int yaz_marc_get_write_format(yaz_marc_t mt) +{ + if (mt) + return mt->output_format; + return -1; +} + + +/** + * Deprecated, use yaz_marc_set_write_format + */ +void yaz_marc_xml(yaz_marc_t mt, int xmlmode) +{ + yaz_marc_set_write_format(mt, xmlmode); +} + + + void yaz_marc_debug(yaz_marc_t mt, int level) { if (mt) @@ -1135,6 +1410,11 @@ void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd) mt->iconv_cd = cd; } +yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt) +{ + return mt->iconv_cd; +} + void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str) { struct yaz_marc_node *n; @@ -1148,56 +1428,94 @@ void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str) } } -/* deprecated */ -int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml) +int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec) { - yaz_marc_t mt = yaz_marc_create(); - int r; - - mt->debug = debug; - mt->xml = xml; - r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr); - yaz_marc_destroy(mt); - return r; + xfree(mt->leader_spec); + mt->leader_spec = 0; + if (leader_spec) + { + char dummy_leader[24]; + if (marc_exec_leader(leader_spec, dummy_leader, 24)) + return -1; + mt->leader_spec = xstrdup(leader_spec); + } + return 0; } -/* deprecated */ -int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize) +static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) { - return yaz_marc_decode(buf, wr, debug, bsize, 0); + const char *cp = leader_spec; + while (cp) + { + char val[21]; + int pos; + int no_read = 0, no = 0; + + no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read); + if (no < 2 || no_read < 3) + return -1; + if (pos < 0 || (size_t) pos >= size) + return -1; + + if (*val == '\'') + { + const char *vp = strchr(val+1, '\''); + size_t len; + + if (!vp) + return -1; + len = vp-val-1; + if (len + pos > size) + return -1; + memcpy(leader + pos, val+1, len); + } + else if (*val >= '0' && *val <= '9') + { + int ch = atoi(val); + leader[pos] = ch; + } + else + return -1; + cp += no_read; + if (*cp != ',') + break; + + cp++; + } + return 0; } -/* deprecated */ -int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize) +int yaz_marc_decode_formatstr(const char *arg) { - yaz_marc_t mt = yaz_marc_create(); - int r; - - mt->debug = debug; - r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr); - if (!outf) - outf = stdout; - if (r > 0) - fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf); - yaz_marc_destroy(mt); - return r; + int mode = -1; + if (!strcmp(arg, "marc")) + mode = YAZ_MARC_ISO2709; + if (!strcmp(arg, "marcxml")) + mode = YAZ_MARC_MARCXML; + if (!strcmp(arg, "tmarcxml")) + mode = YAZ_MARC_TMARCXML; + if (!strcmp(arg, "marcxchange")) + mode = YAZ_MARC_XCHANGE; + if (!strcmp(arg, "line")) + mode = YAZ_MARC_LINE; + return mode; } -/* deprecated */ -int marc_display_ex (const char *buf, FILE *outf, int debug) +void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable) { - return marc_display_exl (buf, outf, debug, -1); + mt->write_using_libxml2 = enable; } -/* deprecated */ -int marc_display (const char *buf, FILE *outf) +int yaz_marc_is_turbo_format(yaz_marc_t mt) { - return marc_display_ex (buf, outf, 0); + return mt->output_format == YAZ_MARC_TMARCXML; } + /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab