X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarcdisp.c;h=85acb07139e2470971e3ed442c22b58c9af8db45;hp=4e4230bb62805801f0ad1106295a6317a9b75ebd;hb=8ceaeefe2e491935cba91f56007308be6e4996e6;hpb=cd08b51966f34ed2b871f87bc07dd51a0d3fd6a4 diff --git a/src/marcdisp.c b/src/marcdisp.c index 4e4230b..85acb07 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,569 +1,1351 @@ -/* - * Copyright (C) 1995-2005, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) Index Data * See the file LICENSE for details. - * - * $Id: marcdisp.c,v 1.21 2005-04-20 13:17:51 adam Exp $ */ /** * \file marcdisp.c - * \brief Implements MARC display - and conversion utilities + * \brief Implements MARC conversion utilities */ #if HAVE_CONFIG_H #include #endif +#ifdef WIN32 +#include +#endif + +#include #include +#include #include -#include #include #include #include +#include +#include + +#if YAZ_HAVE_XML2 +#include +#include +#endif + +enum yaz_collection_state { + no_collection, + collection_first, + collection_second +}; + +/** \brief node types for yaz_marc_node */ +enum YAZ_MARC_NODE_TYPE +{ + YAZ_MARC_DATAFIELD, + YAZ_MARC_CONTROLFIELD, + YAZ_MARC_COMMENT, + YAZ_MARC_LEADER +}; + +/** \brief represets a data field */ +struct yaz_marc_datafield { + char *tag; + char *indicator; + struct yaz_marc_subfield *subfields; +}; + +/** \brief represents a control field */ +struct yaz_marc_controlfield { + char *tag; + char *data; +}; +/** \brief a comment node */ +struct yaz_marc_comment { + char *comment; +}; + +/** \brief MARC node */ +struct yaz_marc_node { + enum YAZ_MARC_NODE_TYPE which; + union { + struct yaz_marc_datafield datafield; + struct yaz_marc_controlfield controlfield; + char *comment; + char *leader; + } u; + struct yaz_marc_node *next; +}; + +/** \brief represents a subfield */ +struct yaz_marc_subfield { + char *code_data; + struct yaz_marc_subfield *next; +}; + +/** \brief the internals of a yaz_marc_t handle */ struct yaz_marc_t_ { WRBUF m_wr; - int xml; + NMEM nmem; + int output_format; int debug; + int write_using_libxml2; + enum yaz_collection_state enable_collection; yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; + char *leader_spec; + struct yaz_marc_node *nodes; + struct yaz_marc_node **nodes_pp; + struct yaz_marc_subfield **subfield_pp; }; yaz_marc_t yaz_marc_create(void) { yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt)); - mt->xml = YAZ_MARC_LINE; + mt->output_format = YAZ_MARC_LINE; mt->debug = 0; + mt->write_using_libxml2 = 0; + mt->enable_collection = no_collection; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; + mt->leader_spec = 0; strcpy(mt->subfield_str, " $"); strcpy(mt->endline_str, "\n"); + + mt->nmem = nmem_create(); + yaz_marc_reset(mt); return mt; } -void yaz_marc_subfield_str(yaz_marc_t mt, const char *s) +void yaz_marc_destroy(yaz_marc_t mt) { - strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1); - mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0'; + if (!mt) + return ; + nmem_destroy(mt->nmem); + wrbuf_destroy(mt->m_wr); + xfree(mt->leader_spec); + xfree(mt); } -void yaz_marc_endline_str(yaz_marc_t mt, const char *s) +NMEM yaz_marc_get_nmem(yaz_marc_t mt) { - strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1); - mt->endline_str[sizeof(mt->endline_str)-1] = '\0'; + return mt->nmem; } -void yaz_marc_destroy(yaz_marc_t mt) +static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr) { - if (!mt) - return ; - wrbuf_free (mt->m_wr, 1); - xfree (mt); + wrbuf_iconv_reset(wr, mt->iconv_cd); +} + +static int marc_exec_leader(const char *leader_spec, char *leader, + size_t size); +#if YAZ_HAVE_XML2 +static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type); +#endif + +static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) +{ + struct yaz_marc_node *n = (struct yaz_marc_node *) + nmem_malloc(mt->nmem, sizeof(*n)); + n->next = 0; + *mt->nodes_pp = n; + mt->nodes_pp = &n->next; + return n; +} + +#if YAZ_HAVE_XML2 +void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); +} + +void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = tag; + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); +} + +#endif + + +void yaz_marc_add_comment(yaz_marc_t mt, char *comment) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_COMMENT; + n->u.comment = nmem_strdup(mt->nmem, comment); +} + +void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...) +{ + va_list ap; + char buf[200]; + + va_start(ap, fmt); + yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap); + yaz_marc_add_comment(mt, buf); + va_end (ap); +} + +int yaz_marc_get_debug(yaz_marc_t mt) +{ + return mt->debug; +} + +void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_LEADER; + n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len); + marc_exec_leader(mt->leader_spec, n->u.leader, leader_len); +} + +void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, + const char *data, size_t data_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = nmem_strdup(mt->nmem, tag); + n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len); + if (mt->debug) + { + size_t i; + char msg[80]; + + sprintf(msg, "controlfield:"); + for (i = 0; i < 16 && i < data_len; i++) + sprintf(msg + strlen(msg), " %02X", data[i] & 0xff); + if (i < data_len) + sprintf(msg + strlen(msg), " .."); + yaz_marc_add_comment(mt, msg); + } +} + +void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, + const char *indicator, size_t indicator_len) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = nmem_strdup(mt->nmem, tag); + n->u.datafield.indicator = + nmem_strdupn(mt->nmem, indicator, indicator_len); + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; } -static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr) +/** \brief adds a attribute value to the element name if it is plain chars + + If not, and if the attribute name is not null, it will append a + attribute element with the value if attribute name is null it will + return a non-zero value meaning it couldnt handle the value. +*/ +static int element_name_append_attribute_value( + yaz_marc_t mt, WRBUF buffer, + const char *attribute_name, char *code_data, size_t code_len) { - if (mt->xml == YAZ_MARC_ISO2709) - wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); - else if (mt->xml == YAZ_MARC_LINE) - wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); + /* TODO Map special codes to something possible for XML ELEMENT names */ + + int encode = 0; + size_t index = 0; + int success = 0; + for (index = 0; index < code_len; index++) + { + if (!((code_data[index] >= '0' && code_data[index] <= '9') || + (code_data[index] >= 'a' && code_data[index] <= 'z') || + (code_data[index] >= 'A' && code_data[index] <= 'Z'))) + encode = 1; + } + /* Add as attribute */ + if (encode && attribute_name) + wrbuf_printf(buffer, " %s=\"", attribute_name); + + if (!encode || attribute_name) + wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len); else - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len); + success = -1; + + if (encode && attribute_name) + wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/ + return success; } -static int atoi_n_check(const char *buf, int size, int *val) +#if YAZ_HAVE_XML2 +void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const char *indicator, size_t indicator_len) { - if (!isdigit(*(const unsigned char *) buf)) - return 0; - *val = atoi_n(buf, size); - return 1; + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); + n->u.datafield.indicator = + nmem_strdupn(mt->nmem, indicator, indicator_len); + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; } -int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) +void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators) { - int entry_p; - int record_length; - int indicator_length; - int identifier_length; - int end_of_directory; - int base_address; - int length_data_entry; - int length_starting; - int length_implementation; - char lead[24]; - int produce_warnings = 0; + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = tag_value; + n->u.datafield.indicator = indicators; + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; +} + +void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator) +{ + n->u.datafield.indicator = indicator; +} + +#endif +void yaz_marc_add_subfield(yaz_marc_t mt, + const char *code_data, size_t code_data_len) +{ if (mt->debug) - produce_warnings = 1; - if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC - || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE) - produce_warnings = 1; + { + size_t i; + char msg[80]; - record_length = atoi_n (buf, 5); - if (record_length < 25) + sprintf(msg, "subfield:"); + for (i = 0; i < 16 && i < code_data_len; i++) + sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff); + if (i < code_data_len) + sprintf(msg + strlen(msg), " .."); + yaz_marc_add_comment(mt, msg); + } + + if (mt->subfield_pp) { - if (mt->debug) - wrbuf_printf(wr, "\n", - record_length); - return -1; + struct yaz_marc_subfield *n = (struct yaz_marc_subfield *) + nmem_malloc(mt->nmem, sizeof(*n)); + n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len); + n->next = 0; + /* mark subfield_pp to point to this one, so we append here next */ + *mt->subfield_pp = n; + mt->subfield_pp = &n->next; } - memcpy(lead, buf, 24); /* se can modify the header for output */ +} - /* ballout if bsize is known and record_length is less than that */ - if (bsize != -1 && record_length > bsize) - return -1; - if (!atoi_n_check(buf+10, 1, &indicator_length)) +static void check_ascii(yaz_marc_t mt, char *leader, int offset, + int ch_default) +{ + if (leader[offset] < ' ' || leader[offset] > 127) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - lead[10] = '2'; - indicator_length = 2; + yaz_marc_cprintf(mt, + "Leader character at offset %d is non-ASCII. " + "Setting value to '%c'", offset, ch_default); + leader[offset] = ch_default; } - if (!atoi_n_check(buf+11, 1, &identifier_length)) +} + +void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, + int *indicator_length, + int *identifier_length, + int *base_address, + int *length_data_entry, + int *length_starting, + int *length_implementation) +{ + char leader[24]; + + memcpy(leader, leader_c, 24); + + check_ascii(mt, leader, 5, 'a'); + check_ascii(mt, leader, 6, 'a'); + check_ascii(mt, leader, 7, 'a'); + check_ascii(mt, leader, 8, '#'); + check_ascii(mt, leader, 9, '#'); + if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - lead[11] = '2'; - identifier_length = 2; + yaz_marc_cprintf(mt, "Indicator length at offset 10 should" + " hold a number 1-9. Assuming 2"); + leader[10] = '2'; + *indicator_length = 2; } - if (!atoi_n_check(buf+12, 5, &base_address)) + if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - base_address = 0; + yaz_marc_cprintf(mt, "Identifier length at offset 11 should " + " hold a number 1-9. Assuming 2"); + leader[11] = '2'; + *identifier_length = 2; } - if (!atoi_n_check(buf+20, 1, &length_data_entry)) + if (!atoi_n_check(leader+12, 5, base_address)) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - length_data_entry = 4; - lead[20] = '4'; + yaz_marc_cprintf(mt, "Base address at offsets 12..16 should" + " hold a number. Assuming 0"); + *base_address = 0; } - if (!atoi_n_check(buf+21, 1, &length_starting)) + check_ascii(mt, leader, 17, '#'); + check_ascii(mt, leader, 18, '#'); + check_ascii(mt, leader, 19, '#'); + if (!atoi_n_check(leader+20, 1, length_data_entry) || + *length_data_entry < 3) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - length_starting = 5; - lead[21] = '5'; + yaz_marc_cprintf(mt, "Length data entry at offset 20 should" + " hold a number 3-9. Assuming 4"); + *length_data_entry = 4; + leader[20] = '4'; } - if (!atoi_n_check(buf+22, 1, &length_implementation)) + if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4) { - if (produce_warnings) - wrbuf_printf(wr, "\n"); - length_implementation = 0; - lead[22] = '0'; + yaz_marc_cprintf(mt, "Length starting at offset 21 should" + " hold a number 4-9. Assuming 5"); + *length_starting = 5; + leader[21] = '5'; } + if (!atoi_n_check(leader+22, 1, length_implementation)) + { + yaz_marc_cprintf(mt, "Length implementation at offset 22 should" + " hold a number. Assuming 0"); + *length_implementation = 0; + leader[22] = '0'; + } + check_ascii(mt, leader, 23, '0'); - if (mt->xml != YAZ_MARC_LINE) + if (mt->debug) { - char str[80]; - int i; - switch(mt->xml) + yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length); + yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length); + yaz_marc_cprintf(mt, "Base address %5d", *base_address); + yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry); + yaz_marc_cprintf(mt, "Length starting %5d", *length_starting); + yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation); + } + yaz_marc_add_leader(mt, leader, 24); +} + +void yaz_marc_subfield_str(yaz_marc_t mt, const char *s) +{ + strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1); + mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0'; +} + +void yaz_marc_endline_str(yaz_marc_t mt, const char *s) +{ + strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1); + mt->endline_str[sizeof(mt->endline_str)-1] = '\0'; +} + +/* try to guess how many bytes the identifier really is! */ +static size_t cdata_one_character(yaz_marc_t mt, const char *buf) +{ + if (mt->iconv_cd) + { + size_t i; + for (i = 1; i<5; i++) { - case YAZ_MARC_ISO2709: - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, "iconv_cd, (char**) &inp, &inbytesleft, + &outp, &outbytesleft); + yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft); + if (r != (size_t) (-1)) + return i; /* got a complete sequence */ + } + return 1; /* giving up */ + } + else + { + int error = 0; + size_t no_read = 0; + (void) yaz_read_UTF8_char((const unsigned char *) buf, strlen(buf), + &no_read, &error); + if (error == 0 && no_read > 0) + return no_read; + } + return 1; /* we don't know */ +} + +void yaz_marc_reset(yaz_marc_t mt) +{ + nmem_reset(mt->nmem); + mt->nodes = 0; + mt->nodes_pp = &mt->nodes; + mt->subfield_pp = 0; +} + +int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + for (n = mt->nodes; n; n = n->next) + { + switch(n->which) + { + case YAZ_MARC_COMMENT: + wrbuf_iconv_write(wr, mt->iconv_cd, + n->u.comment, strlen(n->u.comment)); + wrbuf_puts(wr, "\n"); + break; + default: + break; + } + } + return 0; +} + +static size_t get_subfield_len(yaz_marc_t mt, const char *data, + int identifier_length) +{ + /* if identifier length is 2 (most MARCs) or less (probably an error), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + if (identifier_length > 2) + return identifier_length - 1; + else + return cdata_one_character(mt, data); +} + +int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr, "%s %s", n->u.datafield.tag, + n->u.datafield.indicator); + for (s = n->u.datafield.subfields; s; s = s->next) { - sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]); - wrbuf_puts (wr, str); + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + + wrbuf_puts (wr, mt->subfield_str); + wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, + using_code_len); + wrbuf_iconv_puts(wr, mt->iconv_cd, " "); + wrbuf_iconv_puts(wr, mt->iconv_cd, + s->code_data + using_code_len); + marc_iconv_reset(mt, wr); } - wrbuf_puts (wr, ">\n"); - break; - case YAZ_MARC_OAIMARC: - wrbuf_puts( - wr, - "\n", - buf[5], buf[6], buf[7]); - wrbuf_puts (wr, str); + wrbuf_puts (wr, mt->endline_str); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, "%s", n->u.controlfield.tag); + wrbuf_iconv_puts(wr, mt->iconv_cd, " "); + wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr); + wrbuf_puts (wr, mt->endline_str); + break; + case YAZ_MARC_COMMENT: + wrbuf_puts(wr, "("); + wrbuf_iconv_write(wr, mt->iconv_cd, + n->u.comment, strlen(n->u.comment)); + marc_iconv_reset(mt, wr); + wrbuf_puts(wr, ")\n"); break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, "%s\n", n->u.leader); + } + } + wrbuf_puts(wr, "\n"); + return 0; +} + +int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr) +{ + if (mt->enable_collection == collection_second) + { + switch(mt->output_format) + { case YAZ_MARC_MARCXML: - wrbuf_printf( - wr, - "\n" - " "); - lead[9] = 'a'; /* set leader to signal unicode */ - marc_cdata(mt, lead, 24, wr); - wrbuf_printf(wr, "\n"); - break; - case YAZ_MARC_XCHANGE: - wrbuf_printf( - wr, - "\n" - " "); - marc_cdata(mt, lead, 24, wr); - wrbuf_printf(wr, "\n"); + case YAZ_MARC_TURBOMARC: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_XCHANGE: + wrbuf_printf(wr, "\n"); break; } } - if (mt->debug) + return 0; +} + +void yaz_marc_enable_collection(yaz_marc_t mt) +{ + mt->enable_collection = collection_first; +} + +int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) +{ + switch(mt->output_format) { - char str[40]; - - wrbuf_puts (wr, "\n"); + case YAZ_MARC_LINE: + return yaz_marc_write_line(mt, wr); + case YAZ_MARC_MARCXML: + return yaz_marc_write_marcxml(mt, wr); + case YAZ_MARC_TURBOMARC: + return yaz_marc_write_turbomarc(mt, wr); + case YAZ_MARC_XCHANGE: + return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */ + case YAZ_MARC_ISO2709: + return yaz_marc_write_iso2709(mt, wr); + case YAZ_MARC_CHECK: + return yaz_marc_write_check(mt, wr); + case YAZ_MARC_JSON: + return yaz_marc_write_json(mt, wr); } + return -1; +} + +static const char *record_name[2] = { "record", "r"}; +static const char *leader_name[2] = { "leader", "l"}; +static const char *controlfield_name[2] = { "controlfield", "c"}; +static const char *datafield_name[2] = { "datafield", "d"}; +static const char *indicator_name[2] = { "ind", "i"}; +static const char *subfield_name[2] = { "subfield", "s"}; + +/** \brief common MARC XML/Xchange/turbomarc writer + \param mt handle + \param wr WRBUF output + \param ns XMLNS for the elements + \param format record format (e.g. "MARC21") + \param type record type (e.g. "Bibliographic") + \param turbo =1 for turbomarc + \retval 0 OK + \retval -1 failure +*/ +static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type, + int turbo) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; - /* first pass. determine length of directory & base of data */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + if (mt->enable_collection != no_collection) { - /* length of directory entry */ - int l = 3 + length_data_entry + length_starting; - if (entry_p + l >= record_length) - { - wrbuf_printf (wr, "\n", entry_p); - return -1; - } - if (mt->debug) - wrbuf_printf (wr, "\n", - entry_p, buf+entry_p); - /* check for digits in length info */ - while (--l >= 3) - if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) - break; - if (l >= 3) - { - /* not all digits, so stop directory scan */ - wrbuf_printf (wr, "\n", entry_p); - break; - } - entry_p += 3 + length_data_entry + length_starting; + if (mt->enable_collection == collection_first) + { + wrbuf_printf(wr, "\n", ns); + mt->enable_collection = collection_second; + } + wrbuf_printf(wr, "<%s", record_name[turbo]); + } + else + { + wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns); + } + if (format) + wrbuf_printf(wr, " format=\"%.80s\"", format); + if (type) + wrbuf_printf(wr, " type=\"%.80s\"", type); + wrbuf_printf(wr, ">\n"); + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + + wrbuf_printf(wr, " <%s", datafield_name[turbo]); + if (!turbo) + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + if (!turbo) + wrbuf_printf(wr, "\""); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); + } + } + wrbuf_printf(wr, ">\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_printf(wr, " <%s", subfield_name[turbo]); + if (!turbo) + { + wrbuf_printf(wr, " code=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } + else + { + element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len); + wrbuf_puts(wr, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data + using_code_len, + strlen(s->code_data + using_code_len)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "code_data, using_code_len); + wrbuf_puts(wr, ">\n"); + } + wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, ">\n"); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, " <%s", controlfield_name[turbo]); + if (!turbo) + { + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } + else + { + /* TODO convert special */ + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_puts(wr, ">\n"); + break; + case YAZ_MARC_COMMENT: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, " <%s>", leader_name[turbo]); + wrbuf_iconv_write_cdata(wr, + 0 , /* no charset conversion for leader */ + n->u.leader, strlen(n->u.leader)); + wrbuf_printf(wr, "\n", leader_name[turbo]); + } } - end_of_directory = entry_p; - if (base_address != entry_p+1) + wrbuf_printf(wr, "\n", record_name[turbo]); + return 0; +} + +static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type, + int turbo) +{ + if (mt->write_using_libxml2) { - if (produce_warnings) - wrbuf_printf (wr,"\n", base_address, entry_p+1); +#if YAZ_HAVE_XML2 + int ret; + xmlNode *root_ptr; + + if (!turbo) + ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); + else + ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type); + if (ret == 0) + { + xmlChar *buf_out; + xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0"); + int len_out; + + xmlDocSetRootElement(doc, root_ptr); + xmlDocDumpMemory(doc, &buf_out, &len_out); + + wrbuf_write(wr, (const char *) buf_out, len_out); + wrbuf_puts(wr, ""); + xmlFree(buf_out); + xmlFreeDoc(doc); + } + return ret; +#else + return -1; +#endif } - if (mt->xml == YAZ_MARC_ISO2709) + else + return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo); +} + +int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) +{ + /* set leader 09 to 'a' for UNICODE */ + /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ + if (!mt->leader_spec) + yaz_marc_modify_leader(mt, 9, "a"); + return yaz_marc_write_marcxml_ns(mt, wr, + "http://www.loc.gov/MARC21/slim", + 0, 0, 0); +} + +int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr) +{ + /* set leader 09 to 'a' for UNICODE */ + /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ + if (!mt->leader_spec) + yaz_marc_modify_leader(mt, 9, "a"); + return yaz_marc_write_marcxml_ns(mt, wr, + "http://www.indexdata.com/turbomarc", 0, 0, 1); +} + +int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr, + const char *format, + const char *type) +{ + return yaz_marc_write_marcxml_ns(mt, wr, + "info:lc/xmlns/marcxchange-v1", + 0, 0, 0); +} + +#if YAZ_HAVE_XML2 + +void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, + xmlNode *record_ptr, + xmlNsPtr ns_record, WRBUF wr_cdata, + int identifier_length) +{ + xmlNode *ptr; + struct yaz_marc_subfield *s; + WRBUF subfield_name = wrbuf_alloc(); + + /* TODO consider if safe */ + char field[10]; + field[0] = 'd'; + strncpy(field + 1, n->u.datafield.tag, 3); + field[4] = '\0'; + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0); + + if (n->u.datafield.indicator) { - WRBUF wr_head = wrbuf_alloc(); - WRBUF wr_dir = wrbuf_alloc(); - WRBUF wr_tmp = wrbuf_alloc(); - - int data_p = 0; - /* second pass. create directory for ISO2709 output */ - for (entry_p = 24; entry_p != end_of_directory; ) - { - int data_length, data_offset, end_offset; - int i, sz1, sz2; - - wrbuf_write(wr_dir, buf+entry_p, 3); - entry_p += 3; - - data_length = atoi_n (buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) - return -1; - - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - i++; - sz1 = 1+i - (data_offset + base_address); - if (mt->iconv_cd) - { - sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd, - buf + data_offset+base_address, sz1); - wrbuf_rewind(wr_tmp); - } - else - sz2 = sz1; - wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2); - wrbuf_printf(wr_dir, "%0*d", length_starting, data_p); - data_p += sz2; - } - wrbuf_putc(wr_dir, ISO2709_FS); - wrbuf_printf(wr_head, "%05d", data_p+1 + base_address); - wrbuf_write(wr_head, lead+5, 7); - wrbuf_printf(wr_head, "%05d", base_address); - wrbuf_write(wr_head, lead+17, 7); - - wrbuf_write(wr, wrbuf_buf(wr_head), 24); - wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); - wrbuf_free(wr_head, 1); - wrbuf_free(wr_dir, 1); - wrbuf_free(wr_tmp, 1); + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + char ind_str[6]; + char ind_val[2]; + + ind_val[0] = n->u.datafield.indicator[i]; + ind_val[1] = '\0'; + sprintf(ind_str, "%s%d", indicator_name[1], i+1); + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); + } } - /* third pass. create data output */ - for (entry_p = 24; entry_p != end_of_directory; ) + for (s = n->u.datafield.subfields; s; s = s->next) { - int data_length; - int data_offset; - int end_offset; - int i, j; - char tag[4]; - int identifier_flag = 0; - int entry_p0 = entry_p; - - memcpy (tag, buf+entry_p, 3); - entry_p += 3; - tag[3] = '\0'; - data_length = atoi_n (buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (data_length <= 0 || data_offset < 0) - break; - - if (mt->debug) - { - wrbuf_printf(wr, "\n", - entry_p0, data_length, data_offset); - } - if (end_offset >= record_length) - { - wrbuf_printf (wr,"\n", - entry_p0, end_offset, record_length); - break; - } - - if (memcmp (tag, "00", 2)) - identifier_flag = 1; /* if not 00X assume subfields */ - else if (indicator_length < 4 && indicator_length > 0) + int not_written; + xmlNode *ptr_subfield; + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len); + marc_iconv_reset(mt, wr_cdata); + + wrbuf_rewind(subfield_name); + wrbuf_puts(subfield_name, "s"); + not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0; + ptr_subfield = xmlNewTextChild(ptr, ns_record, + BAD_CAST wrbuf_cstr(subfield_name), + BAD_CAST wrbuf_cstr(wr_cdata)); + if (not_written) { - /* Danmarc 00X have subfields */ - if (buf[i + indicator_length] == ISO2709_IDFS) - identifier_flag = 1; - else if (buf[i + indicator_length + 1] == ISO2709_IDFS) - identifier_flag = 2; + /* Generate code attribute value and add */ + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata)); } + } + wrbuf_destroy(subfield_name); +} - if (mt->debug) - { - wrbuf_printf(wr, "\n", - identifier_flag); - } - - switch(mt->xml) +static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + xmlNode *record_ptr; + xmlNsPtr ns_record; + WRBUF wr_cdata = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) { - case YAZ_MARC_LINE: - wrbuf_puts (wr, tag); - wrbuf_puts (wr, " "); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_printf (wr, "u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + wr_cdata = wrbuf_alloc(); + + record_ptr = xmlNewNode(0, BAD_CAST "r"); + *root_ptr = record_ptr; + + ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); + xmlSetNs(record_ptr, ns_record); + + if (format) + xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); + if (type) + xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); + for (n = mt->nodes; n; n = n->next) + { + xmlNode *ptr; + + char field[10]; + field[0] = 'c'; + field[4] = '\0'; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_cdata); + + strncpy(field + 1, n->u.controlfield.tag, 3); + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST field, + BAD_CAST wrbuf_cstr(wr_cdata)); + break; + case YAZ_MARC_COMMENT: + ptr = xmlNewComment(BAD_CAST n->u.comment); + xmlAddChild(record_ptr, ptr); + break; + case YAZ_MARC_LEADER: + xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l", + BAD_CAST n->u.leader); + break; + } + } + wrbuf_destroy(wr_cdata); + return 0; +} + + +int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + xmlNode *record_ptr; + xmlNsPtr ns_record; + WRBUF wr_cdata = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - if (identifier_flag) - wrbuf_printf (wr, " nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + xmlNode *ptr; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); + if (n->u.datafield.indicator) { - switch(mt->xml) + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) { - case YAZ_MARC_ISO2709: - wrbuf_putc(wr, buf[i]); - break; - case YAZ_MARC_LINE: - wrbuf_putc(wr, buf[i]); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_printf(wr, " Indicator%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); - wrbuf_printf(wr, "\""); - break; - case YAZ_MARC_OAIMARC: - wrbuf_printf(wr, " i%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); - wrbuf_printf(wr, "\""); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - wrbuf_printf(wr, " ind%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); - wrbuf_printf(wr, "\""); + char ind_str[6]; + char ind_val[2]; + + sprintf(ind_str, "ind%d", i+1); + ind_val[0] = n->u.datafield.indicator[i]; + ind_val[1] = '\0'; + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); } } - } - if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML - || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE) - { - wrbuf_puts (wr, ">"); - if (identifier_flag) - wrbuf_puts (wr, "\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + xmlNode *ptr_subfield; + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, + s->code_data + using_code_len); + marc_iconv_reset(mt, wr_cdata); + ptr_subfield = xmlNewTextChild( + ptr, ns_record, + BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); + + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd, + s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", + BAD_CAST wrbuf_cstr(wr_cdata)); + } + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_cdata); + + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST "controlfield", + BAD_CAST wrbuf_cstr(wr_cdata)); + + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); + break; + case YAZ_MARC_COMMENT: + ptr = xmlNewComment(BAD_CAST n->u.comment); + xmlAddChild(record_ptr, ptr); + break; + case YAZ_MARC_LEADER: + xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader", + BAD_CAST n->u.leader); + break; } - if (identifier_flag) + } + wrbuf_destroy(wr_cdata); + return 0; +} + +#endif + +int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) +{ + struct yaz_marc_node *n; + int indicator_length; + int identifier_length; + int length_data_entry; + int length_starting; + int length_implementation; + int data_offset = 0; + const char *leader = 0; + WRBUF wr_dir, wr_head, wr_data_tmp; + int base_address; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + leader = n->u.leader; + + if (!leader) + return -1; + if (!atoi_n_check(leader+10, 1, &indicator_length)) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + if (!atoi_n_check(leader+20, 1, &length_data_entry)) + return -1; + if (!atoi_n_check(leader+21, 1, &length_starting)) + return -1; + if (!atoi_n_check(leader+22, 1, &length_implementation)) + return -1; + + wr_data_tmp = wrbuf_alloc(); + wr_dir = wrbuf_alloc(); + for (n = mt->nodes; n; n = n->next) + { + int data_length = 0; + struct yaz_marc_subfield *s; + + switch(n->which) { - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + case YAZ_MARC_DATAFIELD: + wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag); + data_length += indicator_length; + wrbuf_rewind(wr_data_tmp); + for (s = n->u.datafield.subfields; s; s = s->next) { - int i0; - i++; - switch(mt->xml) - { - case YAZ_MARC_ISO2709: - --i; - wrbuf_iconv_write(wr, mt->iconv_cd, - buf+i, identifier_length); - i += identifier_length; - break; - case YAZ_MARC_LINE: - wrbuf_puts (wr, mt->subfield_str); - marc_cdata(mt, buf+i, identifier_length-1, wr); - i = i+identifier_length-1; - wrbuf_putc (wr, ' '); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, " "); - break; - case YAZ_MARC_OAIMARC: - wrbuf_puts (wr, " "); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - wrbuf_puts (wr, " "); - break; - } - i0 = i; - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS) - i++; - marc_cdata(mt, buf + i0, i - i0, wr); - - if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS) - marc_cdata(mt, buf + i, 1, wr); - - if (mt->xml == YAZ_MARC_SIMPLEXML || - mt->xml == YAZ_MARC_MARCXML || - mt->xml == YAZ_MARC_XCHANGE || - mt->xml == YAZ_MARC_OAIMARC) - wrbuf_puts (wr, "\n"); + /* write dummy IDFS + content */ + wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); + wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data); + marc_iconv_reset(mt, wr_data_tmp); } + /* write dummy FS (makes MARC-8 to become ASCII) */ + wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); + marc_iconv_reset(mt, wr_data_tmp); + data_length += wrbuf_len(wr_data_tmp); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag); + + wrbuf_rewind(wr_data_tmp); + wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, + n->u.controlfield.data); + marc_iconv_reset(mt, wr_data_tmp); + wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */ + marc_iconv_reset(mt, wr_data_tmp); + data_length += wrbuf_len(wr_data_tmp); + break; + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_LEADER: + break; } - else + if (data_length) { - int i0 = i; - while (i < end_offset && - buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - i++; - marc_cdata(mt, buf + i0, i - i0, wr); - if (mt->xml == YAZ_MARC_ISO2709) - marc_cdata(mt, buf + i, 1, wr); - } - if (mt->xml == YAZ_MARC_LINE) - wrbuf_puts (wr, mt->endline_str); - if (i < end_offset) - wrbuf_printf(wr, "\n", data_length); - if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - wrbuf_printf(wr, "\n", data_length); - switch(mt->xml) + wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length); + wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset); + data_offset += data_length; + } + } + /* mark end of directory */ + wrbuf_putc(wr_dir, ISO2709_FS); + + /* base address of data (comes after leader+directory) */ + base_address = 24 + wrbuf_len(wr_dir); + + wr_head = wrbuf_alloc(); + + /* write record length */ + wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1); + /* from "original" leader */ + wrbuf_write(wr_head, leader+5, 7); + /* base address of data */ + wrbuf_printf(wr_head, "%05d", base_address); + /* from "original" leader */ + wrbuf_write(wr_head, leader+17, 7); + + wrbuf_write(wr, wrbuf_buf(wr_head), 24); + wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); + wrbuf_destroy(wr_head); + wrbuf_destroy(wr_dir); + wrbuf_destroy(wr_data_tmp); + + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + + switch(n->which) { - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, "\n"); + case YAZ_MARC_DATAFIELD: + wrbuf_write(wr, n->u.datafield.indicator, indicator_length); + for (s = n->u.datafield.subfields; s; s = s->next) + { + wrbuf_putc(wr, ISO2709_IDFS); + wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data); + marc_iconv_reset(mt, wr); + } + wrbuf_putc(wr, ISO2709_FS); break; - case YAZ_MARC_OAIMARC: - if (identifier_flag) - wrbuf_puts (wr, "\n"); - else - wrbuf_puts (wr, "\n"); + case YAZ_MARC_CONTROLFIELD: + wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr); + wrbuf_putc(wr, ISO2709_FS); break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - if (identifier_flag) - wrbuf_puts (wr, " \n"); - else - wrbuf_puts (wr, "\n"); + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_LEADER: break; } } - switch (mt->xml) + wrbuf_printf(wr, "%c", ISO2709_RS); + return 0; +} + +int yaz_marc_write_json(yaz_marc_t mt, WRBUF w) +{ + int identifier_length; + struct yaz_marc_node *n; + const char *leader = 0; + int first = 1; + + wrbuf_puts(w, "{\n"); + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + leader = n->u.leader; + + if (!leader) + return -1; + + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + wrbuf_puts(w, "\t\"leader\":\""); + wrbuf_json_puts(w, leader); + wrbuf_puts(w, "\",\n"); + wrbuf_puts(w, "\t\"fields\":\n\t[\n"); + + for (n = mt->nodes; n; n = n->next) { - case YAZ_MARC_LINE: - wrbuf_puts (wr, ""); - break; - case YAZ_MARC_SIMPLEXML: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_OAIMARC: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_MARCXML: - case YAZ_MARC_XCHANGE: - wrbuf_puts (wr, "\n"); - break; - case YAZ_MARC_ISO2709: - wrbuf_putc (wr, ISO2709_RS); - break; + struct yaz_marc_subfield *s; + const char *sep = ""; + switch (n->which) + { + case YAZ_MARC_LEADER: + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_CONTROLFIELD: + if (first) + first = 0; + else + wrbuf_puts(w, ",\n"); + wrbuf_puts(w, "\t\t{\n\t\t\t\""); + wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag); + wrbuf_puts(w, "\":\""); + wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data); + wrbuf_puts(w, "\"\n\t\t}"); + break; + case YAZ_MARC_DATAFIELD: + if (first) + first = 0; + else + wrbuf_puts(w, ",\n"); + + wrbuf_puts(w, "\t\t{\n\t\t\t\""); + wrbuf_json_puts(w, n->u.datafield.tag); + wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_puts(w, sep); + sep = ",\n"; + wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\""); + wrbuf_iconv_json_write(w, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_puts(w, "\":\""); + wrbuf_iconv_json_puts(w, mt->iconv_cd, + s->code_data + using_code_len); + wrbuf_puts(w, "\"\n\t\t\t\t\t}"); + } + wrbuf_puts(w, "\n\t\t\t\t]"); + if (n->u.datafield.indicator[0]) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1, + n->u.datafield.indicator[i]); + } + } + wrbuf_puts(w, "\n\t\t\t}\n"); + wrbuf_puts(w, "\n\t\t}"); + break; + } } - return record_length; + wrbuf_puts(w, "\n\t]\n"); + wrbuf_puts(w, "}\n"); + return 0; +} + +int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) +{ + int s, r = yaz_marc_read_iso2709(mt, buf, bsize); + if (r <= 0) + return r; + s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */ + if (s != 0) + return -1; /* error */ + return r; /* OK, return length > 0 */ } int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, - char **result, int *rsize) + const char **result, size_t *rsize) { - int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); + int r; + + wrbuf_rewind(mt->m_wr); + r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); if (result) - *result = wrbuf_buf(mt->m_wr); + *result = wrbuf_cstr(mt->m_wr); if (rsize) *rsize = wrbuf_len(mt->m_wr); return r; @@ -571,8 +1353,7 @@ int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, void yaz_marc_xml(yaz_marc_t mt, int xmlmode) { - if (mt) - mt->xml = xmlmode; + mt->output_format = xmlmode; } void yaz_marc_debug(yaz_marc_t mt, int level) @@ -586,50 +1367,120 @@ void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd) mt->iconv_cd = cd; } -/* depricated */ -int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml) +yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt) { - yaz_marc_t mt = yaz_marc_create(); - int r; + return mt->iconv_cd; +} - mt->debug = debug; - mt->xml = xml; - r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr); - yaz_marc_destroy(mt); - return r; +void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str) +{ + struct yaz_marc_node *n; + char *leader = 0; + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + memcpy(leader+off, str, strlen(str)); + break; + } } -/* depricated */ -int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize) +int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec) { - return yaz_marc_decode(buf, wr, debug, bsize, 0); + xfree(mt->leader_spec); + mt->leader_spec = 0; + if (leader_spec) + { + char dummy_leader[24]; + if (marc_exec_leader(leader_spec, dummy_leader, 24)) + return -1; + mt->leader_spec = xstrdup(leader_spec); + } + return 0; } -/* depricated */ -int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize) +static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) { - yaz_marc_t mt = yaz_marc_create(); - int r; + const char *cp = leader_spec; + while (cp) + { + char val[21]; + int pos; + int no_read = 0, no = 0; - mt->debug = debug; - r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr); - if (!outf) - outf = stdout; - if (r > 0) - fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf); - yaz_marc_destroy(mt); - return r; + no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read); + if (no < 2 || no_read < 3) + return -1; + if (pos < 0 || (size_t) pos >= size) + return -1; + + if (*val == '\'') + { + const char *vp = strchr(val+1, '\''); + size_t len; + + if (!vp) + return -1; + len = vp-val-1; + if (len + pos > size) + return -1; + memcpy(leader + pos, val+1, len); + } + else if (*val >= '0' && *val <= '9') + { + int ch = atoi(val); + leader[pos] = ch; + } + else + return -1; + cp += no_read; + if (*cp != ',') + break; + + cp++; + } + return 0; } -/* depricated */ -int marc_display_ex (const char *buf, FILE *outf, int debug) +int yaz_marc_decode_formatstr(const char *arg) { - return marc_display_exl (buf, outf, debug, -1); + int mode = -1; + if (!strcmp(arg, "marc")) + mode = YAZ_MARC_ISO2709; + if (!strcmp(arg, "marcxml")) + mode = YAZ_MARC_MARCXML; + if (!strcmp(arg, "turbomarc")) + mode = YAZ_MARC_TURBOMARC; + if (!strcmp(arg, "marcxchange")) + mode = YAZ_MARC_XCHANGE; + if (!strcmp(arg, "line")) + mode = YAZ_MARC_LINE; + if (!strcmp(arg, "json")) + mode = YAZ_MARC_JSON; + return mode; } -/* depricated */ -int marc_display (const char *buf, FILE *outf) +void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable) { - return marc_display_ex (buf, outf, 0); + mt->write_using_libxml2 = enable; } +int yaz_marc_check_marc21_coding(const char *charset, + const char *marc_buf, int sz) +{ + if ((!yaz_matchstr(charset, "MARC8?") || + !yaz_matchstr(charset, "MARC8")) && marc_buf && sz > 25 + && marc_buf[9] == 'a') + return 1; + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +