X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarcdisp.c;h=5cda0385a8b5dd6e2dbdd5ef4d91c4f9d700f8ac;hp=62200faaa4e00aea5420b6957b3b1c24a235b146;hb=c0a7048ecd7a2dd9bede0edb27dbe1547be2e9d4;hpb=8d691989077a0addcbd840d769dce6700f3d9622 diff --git a/src/marcdisp.c b/src/marcdisp.c index 62200fa..5cda038 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2007, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data * See the file LICENSE for details. - * - * $Id: marcdisp.c,v 1.42 2007-01-03 08:42:15 adam Exp $ */ /** @@ -20,18 +18,26 @@ #include #include +#include #include #include #include #include #include #include +#include #if YAZ_HAVE_XML2 #include #include #endif +enum yaz_collection_state { + no_collection, + collection_first, + collection_second +}; + /** \brief node types for yaz_marc_node */ enum YAZ_MARC_NODE_TYPE { @@ -81,9 +87,11 @@ struct yaz_marc_subfield { struct yaz_marc_t_ { WRBUF m_wr; NMEM nmem; - int xml; + int input_format; + int output_format; int debug; int write_using_libxml2; + enum yaz_collection_state enable_collection; yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; @@ -96,9 +104,10 @@ struct yaz_marc_t_ { yaz_marc_t yaz_marc_create(void) { yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt)); - mt->xml = YAZ_MARC_LINE; + mt->output_format = YAZ_MARC_LINE; mt->debug = 0; mt->write_using_libxml2 = 0; + mt->enable_collection = no_collection; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; mt->leader_spec = 0; @@ -115,7 +124,7 @@ void yaz_marc_destroy(yaz_marc_t mt) if (!mt) return ; nmem_destroy(mt->nmem); - wrbuf_free(mt->m_wr, 1); + wrbuf_destroy(mt->m_wr); xfree(mt->leader_spec); xfree(mt); } @@ -125,13 +134,19 @@ NMEM yaz_marc_get_nmem(yaz_marc_t mt) return mt->nmem; } +static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr) +{ + wrbuf_iconv_reset(wr, mt->iconv_cd); +} + static int marc_exec_leader(const char *leader_spec, char *leader, size_t size); static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) { - struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n)); + struct yaz_marc_node *n = (struct yaz_marc_node *) + nmem_malloc(mt->nmem, sizeof(*n)); n->next = 0; *mt->nodes_pp = n; mt->nodes_pp = &n->next; @@ -147,6 +162,16 @@ void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem); n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); } + +void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag, + const xmlNode *ptr_data) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_CONTROLFIELD; + n->u.controlfield.tag = tag; + n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); +} + #endif @@ -161,19 +186,9 @@ void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...) { va_list ap; char buf[200]; - va_start(ap, fmt); -#ifdef WIN32 - _vsnprintf(buf, sizeof(buf)-1, fmt, ap); -#else -/* !WIN32 */ -#if HAVE_VSNPRINTF - vsnprintf(buf, sizeof(buf), fmt, ap); -#else - vsprintf(buf, fmt, ap); -#endif -#endif -/* WIN32 */ + va_start(ap, fmt); + yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap); yaz_marc_add_comment(mt, buf); va_end (ap); } @@ -226,6 +241,36 @@ void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, mt->subfield_pp = &n->u.datafield.subfields; } +// Magic function: adds a attribute value to the element name if it is plain characters. +// if not, and if the attribute name is not null, it will append a attribute element with the value +// if attribute name is null it will return a non-zero value meaning it couldnt handle the value. + +int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len) { + // TODO Map special codes to something possible for XML ELEMENT names + + int encode = 0; + int index = 0; + for (index = 0; index < code_len; index++) { + if (!((code_data[index] >= '0' && code_data[index] <= '9') || + (code_data[index] >= 'a' && code_data[index] <= 'z') || + (code_data[index] >= 'A' && code_data[index] <= 'Z'))) + encode = 1; + } + int success = 0; + // Add as attribute + if (encode && attribute_name) + wrbuf_printf(buffer, " %s=\"", attribute_name); + + if (!encode || attribute_name) + wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len); + else + success = -1; + + if (encode && attribute_name) + wrbuf_printf(buffer, "\""); // return error if we couldn't handle it. + return success; +} + #if YAZ_HAVE_XML2 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const char *indicator, size_t indicator_len) @@ -240,6 +285,24 @@ void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, /* make subfield_pp the current (last one) */ mt->subfield_pp = &n->u.datafield.subfields; } + +void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators) +{ + struct yaz_marc_node *n = yaz_marc_add_node(mt); + n->which = YAZ_MARC_DATAFIELD; + n->u.datafield.tag = tag_value; + n->u.datafield.indicator = indicators; + n->u.datafield.subfields = 0; + + // make subfield_pp the current (last one) + mt->subfield_pp = &n->u.datafield.subfields; +} + +void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator) +{ + n->u.datafield.indicator = indicator; +} + #endif void yaz_marc_add_subfield(yaz_marc_t mt, @@ -260,7 +323,8 @@ void yaz_marc_add_subfield(yaz_marc_t mt, if (mt->subfield_pp) { - struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n)); + struct yaz_marc_subfield *n = (struct yaz_marc_subfield *) + nmem_malloc(mt->nmem, sizeof(*n)); n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len); n->next = 0; /* mark subfield_pp to point to this one, so we append here next */ @@ -269,16 +333,6 @@ void yaz_marc_add_subfield(yaz_marc_t mt, } } -int atoi_n_check(const char *buf, int size, int *val) -{ - int i; - for (i = 0; i < size; i++) - if (!isdigit(i[(const unsigned char *) buf])) - return 0; - *val = atoi_n(buf, size); - return 1; -} - void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, @@ -420,7 +474,7 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) case YAZ_MARC_COMMENT: wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); - wrbuf_puts(wr, ")\n"); + wrbuf_puts(wr, "\n"); break; default: break; @@ -429,6 +483,17 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) return 0; } +static size_t get_subfield_len(yaz_marc_t mt, const char *data, + int identifier_length) +{ + /* if identifier length is 2 (most MARCs) or less (probably an error), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + if (identifier_length > 2) + return identifier_length - 1; + else + return cdata_one_character(mt, data); +} int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) { @@ -458,13 +523,8 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) n->u.datafield.indicator); for (s = n->u.datafield.subfields; s; s = s->next) { - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); wrbuf_puts (wr, mt->subfield_str); wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, @@ -472,8 +532,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data + using_code_len); - wrbuf_iconv_puts(wr, mt->iconv_cd, " "); - wr->pos--; + marc_iconv_reset(mt, wr); } wrbuf_puts (wr, mt->endline_str); break; @@ -481,14 +540,14 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) wrbuf_printf(wr, "%s", n->u.controlfield.tag); wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); - wrbuf_iconv_puts(wr, mt->iconv_cd, " "); - wr->pos--; + marc_iconv_reset(mt, wr); wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_COMMENT: wrbuf_puts(wr, "("); wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); + marc_iconv_reset(mt, wr); wrbuf_puts(wr, ")\n"); break; case YAZ_MARC_LEADER: @@ -499,13 +558,37 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) return 0; } +int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr) +{ + if (mt->enable_collection == collection_second) + { + switch(mt->output_format) + { + case YAZ_MARC_MARCXML: + case YAZ_MARC_TMARCXML: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_XCHANGE: + wrbuf_printf(wr, "\n"); + break; + } + } + return 0; +} + +void yaz_marc_enable_collection(yaz_marc_t mt) +{ + mt->enable_collection = collection_first; +} + int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) { - switch(mt->xml) + switch(mt->output_format) { case YAZ_MARC_LINE: return yaz_marc_write_line(mt, wr); case YAZ_MARC_MARCXML: + case YAZ_MARC_TMARCXML: return yaz_marc_write_marcxml(mt, wr); case YAZ_MARC_XCHANGE: return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */ @@ -517,6 +600,15 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) return -1; } +const char *collection_name[2] = { "collection", "collection"}; +const char *record_name[2] = { "record", "r"}; +const char *leader_name[2] = { "leader", "l"}; +const char *controlfield_name[2]= { "controlfield", "c"}; +const char *datafield_name[2] = { "datafield", "d"}; +const char *indicator_name[2] = { "ind", "i"}; +const char *subfield_name[2] = { "subfield", "s"}; + + /** \brief common MARC XML/Xchange writer \param mt handle \param wr WRBUF output @@ -533,6 +625,8 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, int identifier_length; const char *leader = 0; + int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML; + for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { @@ -544,8 +638,155 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; + + if (mt->enable_collection != no_collection) + { + if (mt->enable_collection == collection_first) { + wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns); + mt->enable_collection = collection_second; + } + wrbuf_printf(wr, "<%s", record_name[turbo]); + } + else + { + wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns); + } + if (format) + wrbuf_printf(wr, " format=\"%.80s\"", format); + if (type) + wrbuf_printf(wr, " type=\"%.80s\"", type); + wrbuf_printf(wr, ">\n"); + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: - wrbuf_printf(wr, "iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + if (!turbo) + wrbuf_printf(wr, "\""); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); + } + } + wrbuf_printf(wr, ">\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_printf(wr, " <%s", subfield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " code=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } else { + element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len); + wrbuf_puts(wr, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data + using_code_len, + strlen(s->code_data + using_code_len)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "code_data, using_code_len); + wrbuf_puts(wr, ">\n"); + } + wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, ">\n", datafield_name[turbo]); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_printf(wr, " <%s", controlfield_name[turbo]); + if (!turbo) { + wrbuf_printf(wr, " tag=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } + else { + //TODO convert special + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); + } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + marc_iconv_reset(mt, wr); + wrbuf_printf(wr, "iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_puts(wr, ">\n"); + break; + case YAZ_MARC_COMMENT: + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_LEADER: + wrbuf_printf(wr, " <%s>", leader_name[turbo]); + wrbuf_iconv_write_cdata(wr, + 0 , /* no charset conversion for leader */ + n->u.leader, strlen(n->u.leader)); + wrbuf_printf(wr, "\n", leader_name[turbo]); + } + } + wrbuf_printf(wr, "\n", record_name[turbo]); + return 0; +} + +static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + if (mt->enable_collection != no_collection) + { + if (mt->enable_collection == collection_first) + wrbuf_printf(wr, "\n", ns); + mt->enable_collection = collection_second; + wrbuf_printf(wr, "\n"); for (s = n->u.datafield.subfields; s; s = s->next) { - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); - + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); wrbuf_iconv_puts(wr, mt->iconv_cd, " iconv_cd, s->code_data, using_code_len); @@ -591,6 +826,7 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, wrbuf_iconv_write_cdata(wr, mt->iconv_cd, s->code_data + using_code_len, strlen(s->code_data + using_code_len)); + marc_iconv_reset(mt, wr); wrbuf_iconv_puts(wr, mt->iconv_cd, ""); wrbuf_puts(wr, "\n"); } @@ -601,7 +837,11 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); - wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.controlfield.data, + strlen(n->u.controlfield.data)); + + marc_iconv_reset(mt, wr); wrbuf_iconv_puts(wr, mt->iconv_cd, ""); wrbuf_puts(wr, "\n"); break; @@ -622,6 +862,7 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, return 0; } + static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, const char *ns, const char *format, @@ -629,10 +870,14 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, { if (mt->write_using_libxml2) { +#if YAZ_HAVE_XML2 int ret; xmlNode *root_ptr; - ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); + if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML) + ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); + else // Check for Turbo XML + ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type); if (ret == 0) { xmlChar *buf_out; @@ -648,6 +893,9 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, xmlFreeDoc(doc); } return ret; +#else + return -1; +#endif } else return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type); @@ -655,9 +903,14 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) { + /* set leader 09 to 'a' for UNICODE */ + /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ if (!mt->leader_spec) yaz_marc_modify_leader(mt, 9, "a"); - return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim", + char *name_space = "http://www.loc.gov/MARC21/slim"; + if (mt->output_format == YAZ_MARC_TMARCXML) + name_space = "http://www.indexdata.com/MARC21/turboxml"; + return yaz_marc_write_marcxml_ns(mt, wr, name_space, 0, 0); } @@ -666,22 +919,181 @@ int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr, const char *type) { return yaz_marc_write_marcxml_ns(mt, wr, - "http://www.bs.dk/standards/MarcXchange", + "info:lc/xmlns/marcxchange-v1", 0, 0); } +#if YAZ_HAVE_XML2 + +void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length) +{ + xmlNode *ptr; + struct yaz_marc_subfield *s; + int turbo = mt->output_format == YAZ_MARC_TMARCXML; + if (!turbo) { + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); + } + else { + //TODO consider if safe + char field[10]; + field[0] = 'd'; + strncpy(field + 1, n->u.datafield.tag, 3); + field[4] = '\0'; + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0); + } + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + char ind_str[6]; + char ind_val[2]; + + ind_val[0] = n->u.datafield.indicator[i]; + ind_val[1] = '\0'; + sprintf(ind_str, "%s%d", indicator_name[turbo], i+1); + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); + } + } + WRBUF subfield_name = wrbuf_alloc(); + for (s = n->u.datafield.subfields; s; s = s->next) + { + xmlNode *ptr_subfield; + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len); + marc_iconv_reset(mt, wr_cdata); + + if (!turbo) { + ptr_subfield = xmlNewTextChild( + ptr, ns_record, + BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); + // Generate code attribute value and add + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", + BAD_CAST wrbuf_cstr(wr_cdata)); + } + else { // Turbo format + wrbuf_rewind(subfield_name); + wrbuf_puts(subfield_name, "s"); + int not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0; + ptr_subfield = xmlNewTextChild(ptr, ns_record, + BAD_CAST wrbuf_cstr(subfield_name), + BAD_CAST wrbuf_cstr(wr_cdata)); + if (not_written) { + // Generate code attribute value and add + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata)); + } + } + } + wrbuf_destroy(subfield_name); +} + +int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) +{ + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + xmlNode *record_ptr; + xmlNsPtr ns_record; + WRBUF wr_cdata = 0; + int turbo = mt->output_format == YAZ_MARC_TMARCXML; + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + wr_cdata = wrbuf_alloc(); + + record_ptr = xmlNewNode(0, BAD_CAST "r"); + *root_ptr = record_ptr; + + ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); + xmlSetNs(record_ptr, ns_record); + + if (format) + xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); + if (type) + xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + xmlNode *ptr; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length); + break; + case YAZ_MARC_CONTROLFIELD: + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_cdata); + + if (!turbo) { + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST "controlfield", + BAD_CAST wrbuf_cstr(wr_cdata)); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); + } + else { + // TODO required iconv? + char field[10]; + field[0] = 'c'; + strncpy(field + 1, n->u.controlfield.tag, 3); + field[4] = '\0'; + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST field, + BAD_CAST wrbuf_cstr(wr_cdata)); + } + + break; + case YAZ_MARC_COMMENT: + ptr = xmlNewComment(BAD_CAST n->u.comment); + xmlAddChild(record_ptr, ptr); + break; + case YAZ_MARC_LEADER: + { + char *field = "leader"; + if (turbo) + field = "l"; + xmlNewTextChild(record_ptr, ns_record, BAD_CAST field, + BAD_CAST n->u.leader); + } + break; + } + } + wrbuf_destroy(wr_cdata); + return 0; +} + int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, const char *ns, const char *format, const char *type) { -#if YAZ_HAVE_XML2 struct yaz_marc_node *n; int identifier_length; const char *leader = 0; xmlNode *record_ptr; xmlNsPtr ns_record; + WRBUF wr_cdata = 0; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) @@ -695,6 +1107,8 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; + wr_cdata = wrbuf_alloc(); + record_ptr = xmlNewNode(0, BAD_CAST "record"); *root_ptr = record_ptr; @@ -731,35 +1145,32 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, } for (s = n->u.datafield.subfields; s; s = s->next) { - char code_val[8]; - xmlNode *ptr_subfield; - /* if identifier length is 2 (most MARCs), - the code is a single character .. However we've - seen multibyte codes, so see how big it really is */ - size_t using_code_len = - (identifier_length != 2) ? identifier_length - 1 - : - cdata_one_character(mt, s->code_data); - - if (using_code_len >= sizeof(code_val)-1) - continue; - + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, + s->code_data + using_code_len); + marc_iconv_reset(mt, wr_cdata); ptr_subfield = xmlNewTextChild( - ptr, ns_record, - BAD_CAST "subfield", - BAD_CAST (s->code_data + using_code_len)); - - memcpy(code_val, s->code_data, using_code_len); - code_val[using_code_len] = '\0'; - - xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST code_val); + ptr, ns_record, + BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); + + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd, + s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", + BAD_CAST wrbuf_cstr(wr_cdata)); } break; case YAZ_MARC_CONTROLFIELD: + wrbuf_rewind(wr_cdata); + wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_cdata); + ptr = xmlNewTextChild(record_ptr, ns_record, BAD_CAST "controlfield", - BAD_CAST n->u.controlfield.data); + BAD_CAST wrbuf_cstr(wr_cdata)); xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); break; @@ -773,12 +1184,15 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, break; } } + wrbuf_destroy(wr_cdata); return 0; -#else - return -1; -#endif } + + + +#endif + int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; @@ -827,9 +1241,11 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) /* write dummy IDFS + content */ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data); + marc_iconv_reset(mt, wr_data_tmp); } /* write dummy FS (makes MARC-8 to become ASCII) */ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); + marc_iconv_reset(mt, wr_data_tmp); data_length += wrbuf_len(wr_data_tmp); break; case YAZ_MARC_CONTROLFIELD: @@ -838,7 +1254,9 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) wrbuf_rewind(wr_data_tmp); wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, n->u.controlfield.data); + marc_iconv_reset(mt, wr_data_tmp); wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */ + marc_iconv_reset(mt, wr_data_tmp); data_length += wrbuf_len(wr_data_tmp); break; case YAZ_MARC_COMMENT: @@ -872,9 +1290,9 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) wrbuf_write(wr, wrbuf_buf(wr_head), 24); wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); - wrbuf_free(wr_head, 1); - wrbuf_free(wr_dir, 1); - wrbuf_free(wr_data_tmp, 1); + wrbuf_destroy(wr_head); + wrbuf_destroy(wr_dir); + wrbuf_destroy(wr_data_tmp); for (n = mt->nodes; n; n = n->next) { @@ -889,17 +1307,13 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) { wrbuf_putc(wr, ISO2709_IDFS); wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data); - /* write dummy blank - makes MARC-8 to become ASCII */ - wrbuf_iconv_putchar(wr, mt->iconv_cd, ' '); - wr->pos--; + marc_iconv_reset(mt, wr); } wrbuf_putc(wr, ISO2709_FS); break; case YAZ_MARC_CONTROLFIELD: wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); - /* write dummy blank - makes MARC-8 to become ASCII */ - wrbuf_iconv_putchar(wr, mt->iconv_cd, ' '); - wr->pos--; + marc_iconv_reset(mt, wr); wrbuf_putc(wr, ISO2709_FS); break; case YAZ_MARC_COMMENT: @@ -925,94 +1339,85 @@ int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) } int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, - char **result, int *rsize) + const char **result, size_t *rsize) { int r; wrbuf_rewind(mt->m_wr); r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); if (result) - *result = wrbuf_buf(mt->m_wr); + *result = wrbuf_cstr(mt->m_wr); if (rsize) *rsize = wrbuf_len(mt->m_wr); return r; } -void yaz_marc_xml(yaz_marc_t mt, int xmlmode) +void yaz_marc_set_read_format(yaz_marc_t mt, int format) { if (mt) - mt->xml = xmlmode; + mt->input_format = format; } -void yaz_marc_debug(yaz_marc_t mt, int level) +int yaz_marc_get_read_format(yaz_marc_t mt) { if (mt) - mt->debug = level; + return mt->input_format; + return -1; } -void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd) + +void yaz_marc_set_write_format(yaz_marc_t mt, int format) { - mt->iconv_cd = cd; + if (mt) { + mt->output_format = format; + } } -void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str) +int yaz_marc_get_write_format(yaz_marc_t mt) { - struct yaz_marc_node *n; - char *leader = 0; - for (n = mt->nodes; n; n = n->next) - if (n->which == YAZ_MARC_LEADER) - { - leader = n->u.leader; - memcpy(leader+off, str, strlen(str)); - break; - } + if (mt) + return mt->output_format; + return -1; } -/* deprecated */ -int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml) -{ - yaz_marc_t mt = yaz_marc_create(); - int r; - mt->debug = debug; - mt->xml = xml; - r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr); - yaz_marc_destroy(mt); - return r; +/** + * Deprecated, use yaz_marc_set_write_format + */ +void yaz_marc_xml(yaz_marc_t mt, int xmlmode) +{ + yaz_marc_set_write_format(mt, xmlmode); } -/* deprecated */ -int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize) + + +void yaz_marc_debug(yaz_marc_t mt, int level) { - return yaz_marc_decode(buf, wr, debug, bsize, 0); + if (mt) + mt->debug = level; } -/* deprecated */ -int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize) +void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd) { - yaz_marc_t mt = yaz_marc_create(); - int r; - - mt->debug = debug; - r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr); - if (!outf) - outf = stdout; - if (r > 0) - fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf); - yaz_marc_destroy(mt); - return r; + mt->iconv_cd = cd; } -/* deprecated */ -int marc_display_ex (const char *buf, FILE *outf, int debug) +yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt) { - return marc_display_exl (buf, outf, debug, -1); + return mt->iconv_cd; } -/* deprecated */ -int marc_display (const char *buf, FILE *outf) +void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str) { - return marc_display_ex (buf, outf, 0); + struct yaz_marc_node *n; + char *leader = 0; + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + memcpy(leader+off, str, strlen(str)); + break; + } } int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec) @@ -1041,7 +1446,7 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read); if (no < 2 || no_read < 3) return -1; - if (pos < 0 || pos >= size) + if (pos < 0 || (size_t) pos >= size) return -1; if (*val == '\'') @@ -1079,6 +1484,8 @@ int yaz_marc_decode_formatstr(const char *arg) mode = YAZ_MARC_ISO2709; if (!strcmp(arg, "marcxml")) mode = YAZ_MARC_MARCXML; + if (!strcmp(arg, "tmarcxml")) + mode = YAZ_MARC_TMARCXML; if (!strcmp(arg, "marcxchange")) mode = YAZ_MARC_XCHANGE; if (!strcmp(arg, "line")) @@ -1091,9 +1498,16 @@ void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable) mt->write_using_libxml2 = enable; } +int yaz_marc_is_turbo_format(yaz_marc_t mt) +{ + return mt->output_format == YAZ_MARC_TMARCXML; +} + + /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab