X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarcdisp.c;h=53d4e81eeefa87e8f8d1eca91daa3d8246aec72b;hp=e7df01ec1aa2cbd13a54dabfac6662ae32522e15;hb=c54a5790984e8bd597a8ccd4d1d2d0cae6da8af3;hpb=a1adc770bc8b74097e1d4388d16b1246063da7e7 diff --git a/src/marcdisp.c b/src/marcdisp.c index e7df01e..53d4e81 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data * See the file LICENSE for details. - * - * $Id: marcdisp.c,v 1.30 2006-05-23 13:17:30 adam Exp $ */ /** @@ -20,19 +18,26 @@ #include #include +#include #include #include #include #include #include +#include +#include -#if HAVE_XML2 +#if YAZ_HAVE_XML2 #include #include #endif -static void yaz_marc_reset(yaz_marc_t mt); - +enum yaz_collection_state { + no_collection, + collection_first, + collection_second +}; + /** \brief node types for yaz_marc_node */ enum YAZ_MARC_NODE_TYPE { @@ -82,11 +87,15 @@ struct yaz_marc_subfield { struct yaz_marc_t_ { WRBUF m_wr; NMEM nmem; - int xml; + int input_format; + int output_format; int debug; + int write_using_libxml2; + enum yaz_collection_state enable_collection; yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; + char *leader_spec; struct yaz_marc_node *nodes; struct yaz_marc_node **nodes_pp; struct yaz_marc_subfield **subfield_pp; @@ -95,10 +104,13 @@ struct yaz_marc_t_ { yaz_marc_t yaz_marc_create(void) { yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt)); - mt->xml = YAZ_MARC_LINE; + mt->output_format = YAZ_MARC_LINE; mt->debug = 0; + mt->write_using_libxml2 = 0; + mt->enable_collection = no_collection; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; + mt->leader_spec = 0; strcpy(mt->subfield_str, " $"); strcpy(mt->endline_str, "\n"); @@ -112,19 +124,57 @@ void yaz_marc_destroy(yaz_marc_t mt) if (!mt) return ; nmem_destroy(mt->nmem); - wrbuf_free (mt->m_wr, 1); - xfree (mt); + wrbuf_destroy(mt->m_wr); + xfree(mt->leader_spec); + xfree(mt); +} + +NMEM yaz_marc_get_nmem(yaz_marc_t mt) +{ + return mt->nmem; +} + +static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr) +{ + wrbuf_iconv_reset(wr, mt->iconv_cd); } -struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) +static int marc_exec_leader(const char *leader_spec, char *leader, + size_t size); + + +static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) { - struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n)); + struct yaz_marc_node *n = (struct yaz_marc_node *) + nmem_malloc(mt->nmem, sizeof(*n)); n->next = 0; *mt->nodes_pp = n; mt->nodes_pp = &n->next; return n; } +#if YAZ_HAVE_XML2 +void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); +} + +void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = tag; + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); +} + +#endif + + void yaz_marc_add_comment(yaz_marc_t mt, char *comment) { struct yaz_marc_node *n = yaz_marc_add_node(mt); @@ -136,28 +186,24 @@ void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...) { va_list ap; char buf[200]; - va_start(ap, fmt); -#ifdef WIN32 - _vsnprintf(buf, sizeof(buf)-1, fmt, ap); -#else -/* !WIN32 */ -#if HAVE_VSNPRINTF - vsnprintf(buf, sizeof(buf), fmt, ap); -#else - vsprintf(buf, fmt, ap); -#endif -#endif -/* WIN32 */ + va_start(ap, fmt); + yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap); yaz_marc_add_comment(mt, buf); va_end (ap); } +int yaz_marc_get_debug(yaz_marc_t mt) +{ + return mt->debug; +} + void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len) { struct yaz_marc_node *n = yaz_marc_add_node(mt); n->which = YAZ_MARC_LEADER; n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len); + marc_exec_leader(mt->leader_spec, n->u.leader, leader_len); } void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, @@ -181,17 +227,6 @@ void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, } } -#if HAVE_XML2 -void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, - const xmlNode *ptr_data) -{ - struct yaz_marc_node *n = yaz_marc_add_node(mt); - n->which = YAZ_MARC_CONTROLFIELD; - n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); - n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); -} -#endif - void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, const char *indicator, size_t indicator_len) { @@ -206,7 +241,34 @@ void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, mt->subfield_pp = &n->u.datafield.subfields; } -#if HAVE_XML2 +char *element_name_encode(yaz_marc_t mt, WRBUF buffer, char *code_data, size_t code_len) { + // TODO Map special codes to something possible for XML ELEMENT names + + int encode = 0; + int index = 0; + for (index = 0; index < code_len; index++) { + if (!((code_data[index] >= '0' && code_data[index] <= '9') || + (code_data[index] >= 'a' && code_data[index] <= 'z') || + (code_data[index] >= 'A' && code_data[index] <= 'Z'))) + encode = 1; + } + if (!encode) { + wrbuf_iconv_write(buffer, mt->iconv_cd, code_data, code_len); + } + else { + char temp[2*code_len + 1]; + wrbuf_puts(buffer, "-"); + int index; + for (index = 0; index < code_len; index++) { + sprintf(temp, "%02X", (unsigned char) code_data[index] & 0xFF); + temp[2] = 0; + wrbuf_puts(buffer, temp); + }; + yaz_log(YLOG_WARN, "Using numeric value in element name: %s", wrbuf_cstr(buffer)); + } +} + +#if YAZ_HAVE_XML2 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const char *indicator, size_t indicator_len) { @@ -220,6 +282,25 @@ void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, /* make subfield_pp the current (last one) */ mt->subfield_pp = &n->u.datafield.subfields; } + +struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = tag_value; + n->u.datafield.indicator = 0; + n->u.datafield.subfields = 0; + + /* make subfield_pp the current (last one) */ + mt->subfield_pp = &n->u.datafield.subfields; + return n; +} + +void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator) +{ + n->u.datafield.indicator = indicator; +} + #endif void yaz_marc_add_subfield(yaz_marc_t mt, @@ -240,7 +321,8 @@ void yaz_marc_add_subfield(yaz_marc_t mt, if (mt->subfield_pp) { - struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n)); + struct yaz_marc_subfield *n = (struct yaz_marc_subfield *) + nmem_malloc(mt->nmem, sizeof(*n)); n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len); n->next = 0; /* mark subfield_pp to point to this one, so we append here next */ @@ -249,31 +331,13 @@ void yaz_marc_add_subfield(yaz_marc_t mt, } } -static int atoi_n_check(const char *buf, int size, int *val) -{ - if (!isdigit(*(const unsigned char *) buf)) - return 0; - *val = atoi_n(buf, size); - return 1; -} - -/** \brief reads the MARC 24 bytes leader and checks content - \param mt handle - \param leader of the 24 byte leader - \param indicator_length indicator length - \param identifier_length identifier length - \param base_address base address - \param length_data_entry length of data entry - \param length_starting length of starting - \param length_implementation length of implementation defined data -*/ -static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c, - int *indicator_length, - int *identifier_length, - int *base_address, - int *length_data_entry, - int *length_starting, - int *length_implementation) +void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, + int *indicator_length, + int *identifier_length, + int *base_address, + int *length_data_entry, + int *length_starting, + int *length_implementation) { char leader[24]; @@ -375,7 +439,7 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf) return 1; /* we don't know */ } -static void yaz_marc_reset(yaz_marc_t mt) +void yaz_marc_reset(yaz_marc_t mt) { nmem_reset(mt->nmem); mt->nodes = 0; @@ -383,6 +447,52 @@ static void yaz_marc_reset(yaz_marc_t mt) mt->subfield_pp = 0; } +int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + for (n = mt->nodes; n; n = n->next) + { + switch(n->which) + { + case YAZ_MARC_COMMENT: + wrbuf_iconv_write(wr, mt->iconv_cd, + n->u.comment, strlen(n->u.comment)); + wrbuf_puts(wr, "\n"); + break; + default: + break; + } + } + return 0; +} + +static size_t get_subfield_len(yaz_marc_t mt, const char *data, + int identifier_length) +{ + /* if identifier length is 2 (most MARCs) or less (probably an error), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + if (identifier_length > 2) + return identifier_length - 1; + else + return cdata_one_character(mt, data); +} + int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; @@ -411,57 +521,92 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) n->u.datafield.indicator); for (s = n->u.datafield.subfields; s; s = s->next) { - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); wrbuf_puts (wr, mt->subfield_str); wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, using_code_len); - wrbuf_printf(wr, " "); + wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data + using_code_len); + marc_iconv_reset(mt, wr); } wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_CONTROLFIELD: - wrbuf_printf(wr, "%s ", n->u.controlfield.tag); + wrbuf_printf(wr, "%s", n->u.controlfield.tag); + wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr); wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_COMMENT: wrbuf_puts(wr, "("); wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); + marc_iconv_reset(mt, wr); wrbuf_puts(wr, ")\n"); break; case YAZ_MARC_LEADER: wrbuf_printf(wr, "%s\n", n->u.leader); } } + wrbuf_puts(wr, "\n"); + return 0; +} + +int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr) +{ + if (mt->enable_collection == collection_second) + { + switch(mt->output_format) + { + case YAZ_MARC_MARCXML: + case YAZ_MARC_TMARCXML: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_XCHANGE: + wrbuf_printf(wr, "\n"); + break; + } + } return 0; } +void yaz_marc_enable_collection(yaz_marc_t mt) +{ + mt->enable_collection = collection_first; +} + int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) { - switch(mt->xml) + switch(mt->output_format) { case YAZ_MARC_LINE: return yaz_marc_write_line(mt, wr); case YAZ_MARC_MARCXML: + case YAZ_MARC_TMARCXML: return yaz_marc_write_marcxml(mt, wr); case YAZ_MARC_XCHANGE: return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */ case YAZ_MARC_ISO2709: return yaz_marc_write_iso2709(mt, wr); + case YAZ_MARC_CHECK: + return yaz_marc_write_check(mt, wr); } return -1; } +const char *collection_name[2] = { "collection", "collection"}; +const char *record_name[2] = { "record", "r"}; +const char *leader_name[2] = { "leader", "l"}; +const char *controlfield_name[2]= { "controlfield", "c"}; +const char *datafield_name[2] = { "datafield", "d"}; +const char *indicator_name[2] = { "ind", "i"}; +const char *subfield_name[2] = { "subfield", "s"}; + + /** \brief common MARC XML/Xchange writer \param mt handle \param wr WRBUF output @@ -469,15 +614,17 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) \param format record format (e.g. "MARC21") \param type record type (e.g. "Bibliographic") */ -static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, - const char *ns, - const char *format, - const char *type) +static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; + int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML; + for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { @@ -489,8 +636,156 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; + + if (mt->enable_collection != no_collection) + { + if (mt->enable_collection == collection_first) { + wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns); + mt->enable_collection = collection_second; + } + wrbuf_printf(wr, "<%s", record_name[turbo]); + } + else + { + wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns); + } + if (format) + wrbuf_printf(wr, " format=\"%.80s\"", format); + if (type) + wrbuf_printf(wr, " type=\"%.80s\"", type); + wrbuf_printf(wr, ">\n"); + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + + wrbuf_printf(wr, " <%s", datafield_name[turbo]); + if (!turbo) + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + if (!turbo) + wrbuf_printf(wr, "\""); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); + } + } + wrbuf_printf(wr, ">\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_printf(wr, " <%s", subfield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " code=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } else { + element_name_encode(mt, wr, s->code_data, using_code_len); + wrbuf_puts(wr, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data + using_code_len, + strlen(s->code_data + using_code_len)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "iconv_cd, + s->code_data, using_code_len); + wrbuf_puts(wr, ">\n"); + } + wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, ">\n", datafield_name[turbo]); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, " <%s", controlfield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } + else { + //TODO convert special + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_puts(wr, ">\n"); + break; + case YAZ_MARC_COMMENT: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, " <%s>", leader_name[turbo]); + wrbuf_iconv_write_cdata(wr, + 0 /* no charset conversion for leader */, + n->u.leader, strlen(n->u.leader)); + wrbuf_printf(wr, "\n", leader_name[turbo]); + } + } + wrbuf_printf(wr, "\n", record_name[turbo]); + return 0; +} + +static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; - wrbuf_printf(wr, "nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + if (mt->enable_collection != no_collection) + { + if (mt->enable_collection == collection_first) + wrbuf_printf(wr, "\n", ns); + mt->enable_collection = collection_second; + wrbuf_printf(wr, "nodes; n; n = n->next) { struct yaz_marc_subfield *s; + switch(n->which) { case YAZ_MARC_DATAFIELD: @@ -514,28 +810,24 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, wrbuf_printf(wr, " ind%d=\"", i+1); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.indicator+i, 1); - wrbuf_printf(wr, "\""); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); } } wrbuf_printf(wr, ">\n"); for (s = n->u.datafield.subfields; s; s = s->next) { - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); - - wrbuf_puts(wr, " code_data, + identifier_length); + wrbuf_iconv_puts(wr, mt->iconv_cd, " iconv_cd, s->code_data, using_code_len); - wrbuf_puts(wr, "\">"); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, s->code_data + using_code_len, strlen(s->code_data + using_code_len)); - wrbuf_puts(wr, "\n"); + marc_iconv_reset(mt, wr); + wrbuf_iconv_puts(wr, mt->iconv_cd, ""); + wrbuf_puts(wr, "\n"); } wrbuf_printf(wr, " \n"); break; @@ -543,9 +835,14 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, wrbuf_printf(wr, " iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); - wrbuf_printf(wr, "\">"); - wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); - wrbuf_printf(wr, "\n"); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + + marc_iconv_reset(mt, wr); + wrbuf_iconv_puts(wr, mt->iconv_cd, ""); + wrbuf_puts(wr, "\n"); break; case YAZ_MARC_COMMENT: wrbuf_printf(wr, "