X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarcdisp.c;h=85acb07139e2470971e3ed442c22b58c9af8db45;hp=a998b83d4f24f522ec2bb1547b673ce0bc3d95e1;hb=8ceaeefe2e491935cba91f56007308be6e4996e6;hpb=74d0b36793d64daf9c69a2a383a64ef49e17e159 diff --git a/src/marcdisp.c b/src/marcdisp.c index a998b83..85acb07 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2010 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -37,10 +36,10 @@ enum yaz_collection_state { collection_first, collection_second }; - + /** \brief node types for yaz_marc_node */ enum YAZ_MARC_NODE_TYPE -{ +{ YAZ_MARC_DATAFIELD, YAZ_MARC_CONTROLFIELD, YAZ_MARC_COMMENT, @@ -87,7 +86,6 @@ struct yaz_marc_subfield { struct yaz_marc_t_ { WRBUF m_wr; NMEM nmem; - int input_format; int output_format; int debug; int write_using_libxml2; @@ -141,7 +139,12 @@ static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr) static int marc_exec_leader(const char *leader_spec, char *leader, size_t size); - +#if YAZ_HAVE_XML2 +static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type); +#endif static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) { @@ -163,8 +166,8 @@ void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem); } -void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag, - const xmlNode *ptr_data) +void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag, + const xmlNode *ptr_data) { struct yaz_marc_node *n = yaz_marc_add_node(mt); n->which = YAZ_MARC_CONTROLFIELD; @@ -241,33 +244,40 @@ void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, mt->subfield_pp = &n->u.datafield.subfields; } -// Magic function: adds a attribute value to the element name if it is plain characters. -// if not, and if the attribute name is not null, it will append a attribute element with the value -// if attribute name is null it will return a non-zero value meaning it couldnt handle the value. - -int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len) { - // TODO Map special codes to something possible for XML ELEMENT names - - int encode = 0; - int index = 0; - for (index = 0; index < code_len; index++) { - if (!((code_data[index] >= '0' && code_data[index] <= '9') || - (code_data[index] >= 'a' && code_data[index] <= 'z') || - (code_data[index] >= 'A' && code_data[index] <= 'Z'))) - encode = 1; - } - // Add as attribute - if (encode && attribute_name) - wrbuf_printf(buffer, " %s=\"", attribute_name); - - if (!encode || attribute_name) - wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len); - if (encode && attribute_name) - wrbuf_printf(buffer, "\""); - // return error if we couldn't handle it. - if (encode && !attribute_name); - return -1; - return 0; +/** \brief adds a attribute value to the element name if it is plain chars + + If not, and if the attribute name is not null, it will append a + attribute element with the value if attribute name is null it will + return a non-zero value meaning it couldnt handle the value. +*/ +static int element_name_append_attribute_value( + yaz_marc_t mt, WRBUF buffer, + const char *attribute_name, char *code_data, size_t code_len) +{ + /* TODO Map special codes to something possible for XML ELEMENT names */ + + int encode = 0; + size_t index = 0; + int success = 0; + for (index = 0; index < code_len; index++) + { + if (!((code_data[index] >= '0' && code_data[index] <= '9') || + (code_data[index] >= 'a' && code_data[index] <= 'z') || + (code_data[index] >= 'A' && code_data[index] <= 'Z'))) + encode = 1; + } + /* Add as attribute */ + if (encode && attribute_name) + wrbuf_printf(buffer, " %s=\"", attribute_name); + + if (!encode || attribute_name) + wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len); + else + success = -1; + + if (encode && attribute_name) + wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/ + return success; } #if YAZ_HAVE_XML2 @@ -285,7 +295,7 @@ void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, mt->subfield_pp = &n->u.datafield.subfields; } -void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators) +void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators) { struct yaz_marc_node *n = yaz_marc_add_node(mt); n->which = YAZ_MARC_DATAFIELD; @@ -293,7 +303,7 @@ void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indi n->u.datafield.indicator = indicators; n->u.datafield.subfields = 0; - // make subfield_pp the current (last one) + /* make subfield_pp the current (last one) */ mt->subfield_pp = &n->u.datafield.subfields; } @@ -332,6 +342,18 @@ void yaz_marc_add_subfield(yaz_marc_t mt, } } +static void check_ascii(yaz_marc_t mt, char *leader, int offset, + int ch_default) +{ + if (leader[offset] < ' ' || leader[offset] > 127) + { + yaz_marc_cprintf(mt, + "Leader character at offset %d is non-ASCII. " + "Setting value to '%c'", offset, ch_default); + leader[offset] = ch_default; + } +} + void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, @@ -344,53 +366,57 @@ void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, memcpy(leader, leader_c, 24); - if (!atoi_n_check(leader+10, 1, indicator_length)) + check_ascii(mt, leader, 5, 'a'); + check_ascii(mt, leader, 6, 'a'); + check_ascii(mt, leader, 7, 'a'); + check_ascii(mt, leader, 8, '#'); + check_ascii(mt, leader, 9, '#'); + if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0) { - yaz_marc_cprintf(mt, - "Indicator length at offset 10 should hold a digit." - " Assuming 2"); + yaz_marc_cprintf(mt, "Indicator length at offset 10 should" + " hold a number 1-9. Assuming 2"); leader[10] = '2'; *indicator_length = 2; } - if (!atoi_n_check(leader+11, 1, identifier_length)) + if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0) { - yaz_marc_cprintf(mt, - "Identifier length at offset 11 should hold a digit." - " Assuming 2"); + yaz_marc_cprintf(mt, "Identifier length at offset 11 should " + " hold a number 1-9. Assuming 2"); leader[11] = '2'; *identifier_length = 2; } if (!atoi_n_check(leader+12, 5, base_address)) { - yaz_marc_cprintf(mt, - "Base address at offsets 12..16 should hold a number." - " Assuming 0"); + yaz_marc_cprintf(mt, "Base address at offsets 12..16 should" + " hold a number. Assuming 0"); *base_address = 0; } - if (!atoi_n_check(leader+20, 1, length_data_entry)) + check_ascii(mt, leader, 17, '#'); + check_ascii(mt, leader, 18, '#'); + check_ascii(mt, leader, 19, '#'); + if (!atoi_n_check(leader+20, 1, length_data_entry) || + *length_data_entry < 3) { - yaz_marc_cprintf(mt, - "Length data entry at offset 20 should hold a digit." - " Assuming 4"); + yaz_marc_cprintf(mt, "Length data entry at offset 20 should" + " hold a number 3-9. Assuming 4"); *length_data_entry = 4; leader[20] = '4'; } - if (!atoi_n_check(leader+21, 1, length_starting)) + if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4) { - yaz_marc_cprintf(mt, - "Length starting at offset 21 should hold a digit." - " Assuming 5"); + yaz_marc_cprintf(mt, "Length starting at offset 21 should" + " hold a number 4-9. Assuming 5"); *length_starting = 5; leader[21] = '5'; } if (!atoi_n_check(leader+22, 1, length_implementation)) { - yaz_marc_cprintf(mt, - "Length implementation at offset 22 should hold a digit." - " Assuming 0"); + yaz_marc_cprintf(mt, "Length implementation at offset 22 should" + " hold a number. Assuming 0"); *length_implementation = 0; leader[22] = '0'; } + check_ascii(mt, leader, 23, '0'); if (mt->debug) { @@ -432,14 +458,24 @@ static size_t cdata_one_character(yaz_marc_t mt, const char *buf) size_t inbytesleft = i; size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft, &outp, &outbytesleft); + yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft); if (r != (size_t) (-1)) return i; /* got a complete sequence */ } return 1; /* giving up */ } + else + { + int error = 0; + size_t no_read = 0; + (void) yaz_read_UTF8_char((const unsigned char *) buf, strlen(buf), + &no_read, &error); + if (error == 0 && no_read > 0) + return no_read; + } return 1; /* we don't know */ } - + void yaz_marc_reset(yaz_marc_t mt) { nmem_reset(mt->nmem); @@ -460,7 +496,7 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) leader = n->u.leader; break; } - + if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) @@ -471,7 +507,7 @@ int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) switch(n->which) { case YAZ_MARC_COMMENT: - wrbuf_iconv_write(wr, mt->iconv_cd, + wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); wrbuf_puts(wr, "\n"); break; @@ -506,7 +542,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) leader = n->u.leader; break; } - + if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) @@ -524,12 +560,12 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) { size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); - - wrbuf_puts (wr, mt->subfield_str); - wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, + + wrbuf_puts (wr, mt->subfield_str); + wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, using_code_len); wrbuf_iconv_puts(wr, mt->iconv_cd, " "); - wrbuf_iconv_puts(wr, mt->iconv_cd, + wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data + using_code_len); marc_iconv_reset(mt, wr); } @@ -544,7 +580,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) break; case YAZ_MARC_COMMENT: wrbuf_puts(wr, "("); - wrbuf_iconv_write(wr, mt->iconv_cd, + wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); marc_iconv_reset(mt, wr); wrbuf_puts(wr, ")\n"); @@ -564,7 +600,7 @@ int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr) switch(mt->output_format) { case YAZ_MARC_MARCXML: - case YAZ_MARC_TMARCXML: + case YAZ_MARC_TURBOMARC: wrbuf_printf(wr, "\n"); break; case YAZ_MARC_XCHANGE: @@ -587,61 +623,65 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) case YAZ_MARC_LINE: return yaz_marc_write_line(mt, wr); case YAZ_MARC_MARCXML: - case YAZ_MARC_TMARCXML: return yaz_marc_write_marcxml(mt, wr); + case YAZ_MARC_TURBOMARC: + return yaz_marc_write_turbomarc(mt, wr); case YAZ_MARC_XCHANGE: return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */ case YAZ_MARC_ISO2709: return yaz_marc_write_iso2709(mt, wr); case YAZ_MARC_CHECK: return yaz_marc_write_check(mt, wr); + case YAZ_MARC_JSON: + return yaz_marc_write_json(mt, wr); } return -1; } -const char *collection_name[2] = { "collection", "collection"}; -const char *record_name[2] = { "record", "r"}; -const char *leader_name[2] = { "leader", "l"}; -const char *controlfield_name[2]= { "controlfield", "c"}; -const char *datafield_name[2] = { "datafield", "d"}; -const char *indicator_name[2] = { "ind", "i"}; -const char *subfield_name[2] = { "subfield", "s"}; - +static const char *record_name[2] = { "record", "r"}; +static const char *leader_name[2] = { "leader", "l"}; +static const char *controlfield_name[2] = { "controlfield", "c"}; +static const char *datafield_name[2] = { "datafield", "d"}; +static const char *indicator_name[2] = { "ind", "i"}; +static const char *subfield_name[2] = { "subfield", "s"}; -/** \brief common MARC XML/Xchange writer +/** \brief common MARC XML/Xchange/turbomarc writer \param mt handle \param wr WRBUF output \param ns XMLNS for the elements \param format record format (e.g. "MARC21") \param type record type (e.g. "Bibliographic") + \param turbo =1 for turbomarc + \retval 0 OK + \retval -1 failure */ -static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, - const char *ns, - const char *format, - const char *type) +static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type, + int turbo) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; - int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML; - for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } - + if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; - + if (mt->enable_collection != no_collection) { - if (mt->enable_collection == collection_first) { - wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns); + if (mt->enable_collection == collection_first) + { + wrbuf_printf(wr, "\n", ns); mt->enable_collection = collection_second; } wrbuf_printf(wr, "<%s", record_name[turbo]); @@ -663,186 +703,84 @@ static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, { case YAZ_MARC_DATAFIELD: - wrbuf_printf(wr, " <%s", datafield_name[turbo]); + wrbuf_printf(wr, " <%s", datafield_name[turbo]); if (!turbo) wrbuf_printf(wr, " tag=\""); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, strlen(n->u.datafield.tag)); - if (!turbo) - wrbuf_printf(wr, "\""); + if (!turbo) + wrbuf_printf(wr, "\""); if (n->u.datafield.indicator) { int i; for (i = 0; n->u.datafield.indicator[i]; i++) { - wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1); - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, - n->u.datafield.indicator+i, 1); - wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); - } + wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + n->u.datafield.indicator+i, 1); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); + } } - wrbuf_printf(wr, ">\n"); + wrbuf_printf(wr, ">\n"); for (s = n->u.datafield.subfields; s; s = s->next) { size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); wrbuf_printf(wr, " <%s", subfield_name[turbo]); - if (!turbo) { - wrbuf_printf(wr, " code=\""); - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, - s->code_data, using_code_len); - wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); - } else { - element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len); - wrbuf_puts(wr, ">"); - } + if (!turbo) + { + wrbuf_printf(wr, " code=\""); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); + } + else + { + element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len); + wrbuf_puts(wr, ">"); + } wrbuf_iconv_write_cdata(wr, mt->iconv_cd, s->code_data + using_code_len, strlen(s->code_data + using_code_len)); marc_iconv_reset(mt, wr); - wrbuf_printf(wr, "code_data, using_code_len); + wrbuf_printf(wr, "code_data, using_code_len); wrbuf_puts(wr, ">\n"); } wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, - strlen(n->u.datafield.tag)); - wrbuf_printf(wr, ">\n", datafield_name[turbo]); + strlen(n->u.datafield.tag)); + wrbuf_printf(wr, ">\n"); break; case YAZ_MARC_CONTROLFIELD: - wrbuf_printf(wr, " <%s", controlfield_name[turbo]); - if (!turbo) { + wrbuf_printf(wr, " <%s", controlfield_name[turbo]); + if (!turbo) + { wrbuf_printf(wr, " tag=\""); - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, - strlen(n->u.controlfield.tag)); - wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); - } - else { - //TODO convert special - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); - wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); - } - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, - n->u.controlfield.data, - strlen(n->u.controlfield.data)); - marc_iconv_reset(mt, wr); - wrbuf_printf(wr, "iconv_cd, n->u.controlfield.tag, - strlen(n->u.controlfield.tag)); - wrbuf_puts(wr, ">\n"); - break; - case YAZ_MARC_COMMENT: - wrbuf_printf(wr, "\n"); - break; - case YAZ_MARC_LEADER: - wrbuf_printf(wr, " <%s>", leader_name[turbo]); - wrbuf_iconv_write_cdata(wr, - 0 , /* no charset conversion for leader */ - n->u.leader, strlen(n->u.leader)); - wrbuf_printf(wr, "\n", leader_name[turbo]); - } - } - wrbuf_printf(wr, "\n", record_name[turbo]); - return 0; -} - -static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr, - const char *ns, - const char *format, - const char *type) -{ - struct yaz_marc_node *n; - int identifier_length; - const char *leader = 0; - - for (n = mt->nodes; n; n = n->next) - if (n->which == YAZ_MARC_LEADER) - { - leader = n->u.leader; - break; - } - - if (!leader) - return -1; - if (!atoi_n_check(leader+11, 1, &identifier_length)) - return -1; - - if (mt->enable_collection != no_collection) - { - if (mt->enable_collection == collection_first) - wrbuf_printf(wr, "\n", ns); - mt->enable_collection = collection_second; - wrbuf_printf(wr, "\n"); - for (n = mt->nodes; n; n = n->next) - { - struct yaz_marc_subfield *s; - - switch(n->which) - { - case YAZ_MARC_DATAFIELD: - wrbuf_printf(wr, " iconv_cd, n->u.datafield.tag, - strlen(n->u.datafield.tag)); - wrbuf_printf(wr, "\""); - if (n->u.datafield.indicator) - { - int i; - for (i = 0; n->u.datafield.indicator[i]; i++) - { - wrbuf_printf(wr, " ind%d=\"", i+1); - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, - n->u.datafield.indicator+i, 1); - wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); - } + wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); } - wrbuf_printf(wr, ">\n"); - for (s = n->u.datafield.subfields; s; s = s->next) + else { - size_t using_code_len = get_subfield_len(mt, s->code_data, - identifier_length); - wrbuf_iconv_puts(wr, mt->iconv_cd, " iconv_cd, - s->code_data, using_code_len); - wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, - s->code_data + using_code_len, - strlen(s->code_data + using_code_len)); - marc_iconv_reset(mt, wr); - wrbuf_iconv_puts(wr, mt->iconv_cd, ""); - wrbuf_puts(wr, "\n"); + /* TODO convert special */ + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); } - wrbuf_printf(wr, " \n"); - break; - case YAZ_MARC_CONTROLFIELD: - wrbuf_printf(wr, " iconv_cd, n->u.controlfield.tag, - strlen(n->u.controlfield.tag)); - wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.data, strlen(n->u.controlfield.data)); - marc_iconv_reset(mt, wr); - wrbuf_iconv_puts(wr, mt->iconv_cd, ""); - wrbuf_puts(wr, "\n"); + wrbuf_printf(wr, "iconv_cd, n->u.controlfield.tag, + strlen(n->u.controlfield.tag)); + wrbuf_puts(wr, ">\n"); break; case YAZ_MARC_COMMENT: wrbuf_printf(wr, "\n"); break; case YAZ_MARC_LEADER: - wrbuf_printf(wr, " "); - wrbuf_iconv_write_cdata(wr, - 0 /* no charset conversion for leader */, + wrbuf_printf(wr, " <%s>", leader_name[turbo]); + wrbuf_iconv_write_cdata(wr, + 0 , /* no charset conversion for leader */ n->u.leader, strlen(n->u.leader)); - wrbuf_printf(wr, "\n"); + wrbuf_printf(wr, "\n", leader_name[turbo]); } } - wrbuf_puts(wr, "\n"); + wrbuf_printf(wr, "\n", record_name[turbo]); return 0; } - static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, - const char *ns, + const char *ns, const char *format, - const char *type) + const char *type, + int turbo) { if (mt->write_using_libxml2) { @@ -873,10 +811,10 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, int ret; xmlNode *root_ptr; - if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML) - ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); - else // Check for Turbo XML - ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type); + if (!turbo) + ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); + else + ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type); if (ret == 0) { xmlChar *buf_out; @@ -897,7 +835,7 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, #endif } else - return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type); + return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo); } int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) @@ -906,11 +844,19 @@ int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ if (!mt->leader_spec) yaz_marc_modify_leader(mt, 9, "a"); - char *name_space = "http://www.loc.gov/MARC21/slim"; - if (mt->output_format == YAZ_MARC_TMARCXML) - name_space = "http://www.indexdata.com/MARC21/turboxml"; - return yaz_marc_write_marcxml_ns(mt, wr, name_space, - 0, 0); + return yaz_marc_write_marcxml_ns(mt, wr, + "http://www.loc.gov/MARC21/slim", + 0, 0, 0); +} + +int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr) +{ + /* set leader 09 to 'a' for UNICODE */ + /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ + if (!mt->leader_spec) + yaz_marc_modify_leader(mt, 9, "a"); + return yaz_marc_write_marcxml_ns(mt, wr, + "http://www.indexdata.com/turbomarc", 0, 0, 1); } int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr, @@ -919,28 +865,27 @@ int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr, { return yaz_marc_write_marcxml_ns(mt, wr, "info:lc/xmlns/marcxchange-v1", - 0, 0); + 0, 0, 0); } #if YAZ_HAVE_XML2 -void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length) +void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, + xmlNode *record_ptr, + xmlNsPtr ns_record, WRBUF wr_cdata, + int identifier_length) { xmlNode *ptr; struct yaz_marc_subfield *s; - int turbo = mt->output_format == YAZ_MARC_TMARCXML; - if (!turbo) { - ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); - xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); - } - else { - //TODO consider if safe - char field[10]; - field[0] = 'd'; - strncpy(field + 1, n->u.datafield.tag, 3); - field[4] = '\0'; - ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0); - } + WRBUF subfield_name = wrbuf_alloc(); + + /* TODO consider if safe */ + char field[10]; + field[0] = 'd'; + strncpy(field + 1, n->u.datafield.tag, 3); + field[4] = '\0'; + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0); + if (n->u.datafield.indicator) { int i; @@ -948,52 +893,44 @@ void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNod { char ind_str[6]; char ind_val[2]; - + ind_val[0] = n->u.datafield.indicator[i]; ind_val[1] = '\0'; - sprintf(ind_str, "%s%d", indicator_name[turbo], i+1); - xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); + sprintf(ind_str, "%s%d", indicator_name[1], i+1); + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); } } - WRBUF subfield_name = wrbuf_alloc(); for (s = n->u.datafield.subfields; s; s = s->next) { + int not_written; xmlNode *ptr_subfield; size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); wrbuf_rewind(wr_cdata); wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len); marc_iconv_reset(mt, wr_cdata); - - if (!turbo) { - ptr_subfield = xmlNewTextChild( - ptr, ns_record, - BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); - wrbuf_rewind(wr_cdata); - wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); - xmlNewProp(ptr_subfield, BAD_CAST "code", - BAD_CAST wrbuf_cstr(wr_cdata)); - } - else { // Turbo format - wrbuf_rewind(subfield_name); - wrbuf_puts(subfield_name, "s"); - int encoding = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len); - ptr_subfield = xmlNewTextChild(ptr, ns_record, - BAD_CAST wrbuf_cstr(subfield_name), - BAD_CAST wrbuf_cstr(wr_cdata)); - if (encoding) { - wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); - xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata)); - } + + wrbuf_rewind(subfield_name); + wrbuf_puts(subfield_name, "s"); + not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0; + ptr_subfield = xmlNewTextChild(ptr, ns_record, + BAD_CAST wrbuf_cstr(subfield_name), + BAD_CAST wrbuf_cstr(wr_cdata)); + if (not_written) + { + /* Generate code attribute value and add */ + wrbuf_rewind(wr_cdata); + wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len); + xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata)); } } - wrbuf_destroy(subfield_name); + wrbuf_destroy(subfield_name); } -int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, - const char *ns, - const char *format, - const char *type) +static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) { struct yaz_marc_node *n; int identifier_length; @@ -1001,14 +938,14 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, xmlNode *record_ptr; xmlNsPtr ns_record; WRBUF wr_cdata = 0; - int turbo = mt->output_format == YAZ_MARC_TMARCXML; + for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } - + if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) @@ -1028,49 +965,34 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); for (n = mt->nodes; n; n = n->next) { - struct yaz_marc_subfield *s; xmlNode *ptr; + char field[10]; + field[0] = 'c'; + field[4] = '\0'; + switch(n->which) { case YAZ_MARC_DATAFIELD: - add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length); + add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length); break; case YAZ_MARC_CONTROLFIELD: wrbuf_rewind(wr_cdata); wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr_cdata); - - if (!turbo) { - ptr = xmlNewTextChild(record_ptr, ns_record, - BAD_CAST "controlfield", - BAD_CAST wrbuf_cstr(wr_cdata)); - xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); - } - else { - // TODO required iconv? - char field[10]; - field[0] = 'c'; - strncpy(field + 1, n->u.controlfield.tag, 3); - field[4] = '\0'; - ptr = xmlNewTextChild(record_ptr, ns_record, - BAD_CAST field, - BAD_CAST wrbuf_cstr(wr_cdata)); - } + strncpy(field + 1, n->u.controlfield.tag, 3); + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST field, + BAD_CAST wrbuf_cstr(wr_cdata)); break; case YAZ_MARC_COMMENT: ptr = xmlNewComment(BAD_CAST n->u.comment); xmlAddChild(record_ptr, ptr); break; case YAZ_MARC_LEADER: - { - char *field = "leader"; - if (turbo) - field = "l"; - xmlNewTextChild(record_ptr, ns_record, BAD_CAST field, - BAD_CAST n->u.leader); - } + xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l", + BAD_CAST n->u.leader); break; } } @@ -1080,7 +1002,7 @@ int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, - const char *ns, + const char *ns, const char *format, const char *type) { @@ -1097,7 +1019,7 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, leader = n->u.leader; break; } - + if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) @@ -1163,11 +1085,11 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, wrbuf_rewind(wr_cdata); wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr_cdata); - + ptr = xmlNewTextChild(record_ptr, ns_record, BAD_CAST "controlfield", BAD_CAST wrbuf_cstr(wr_cdata)); - + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); break; case YAZ_MARC_COMMENT: @@ -1184,9 +1106,6 @@ int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, return 0; } - - - #endif int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) @@ -1201,11 +1120,11 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) const char *leader = 0; WRBUF wr_dir, wr_head, wr_data_tmp; int base_address; - + for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) leader = n->u.leader; - + if (!leader) return -1; if (!atoi_n_check(leader+10, 1, &indicator_length)) @@ -1248,7 +1167,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag); wrbuf_rewind(wr_data_tmp); - wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, + wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr_data_tmp); wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */ @@ -1283,7 +1202,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) wrbuf_printf(wr_head, "%05d", base_address); /* from "original" leader */ wrbuf_write(wr_head, leader+17, 7); - + wrbuf_write(wr, wrbuf_buf(wr_head), 24); wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); wrbuf_destroy(wr_head); @@ -1297,8 +1216,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) switch(n->which) { case YAZ_MARC_DATAFIELD: - wrbuf_printf(wr, "%.*s", indicator_length, - n->u.datafield.indicator); + wrbuf_write(wr, n->u.datafield.indicator, indicator_length); for (s = n->u.datafield.subfields; s; s = s->next) { wrbuf_putc(wr, ISO2709_IDFS); @@ -1322,6 +1240,91 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) return 0; } +int yaz_marc_write_json(yaz_marc_t mt, WRBUF w) +{ + int identifier_length; + struct yaz_marc_node *n; + const char *leader = 0; + int first = 1; + + wrbuf_puts(w, "{\n"); + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + leader = n->u.leader; + + if (!leader) + return -1; + + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + wrbuf_puts(w, "\t\"leader\":\""); + wrbuf_json_puts(w, leader); + wrbuf_puts(w, "\",\n"); + wrbuf_puts(w, "\t\"fields\":\n\t[\n"); + + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + const char *sep = ""; + switch (n->which) + { + case YAZ_MARC_LEADER: + case YAZ_MARC_COMMENT: + break; + case YAZ_MARC_CONTROLFIELD: + if (first) + first = 0; + else + wrbuf_puts(w, ",\n"); + wrbuf_puts(w, "\t\t{\n\t\t\t\""); + wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag); + wrbuf_puts(w, "\":\""); + wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data); + wrbuf_puts(w, "\"\n\t\t}"); + break; + case YAZ_MARC_DATAFIELD: + if (first) + first = 0; + else + wrbuf_puts(w, ",\n"); + + wrbuf_puts(w, "\t\t{\n\t\t\t\""); + wrbuf_json_puts(w, n->u.datafield.tag); + wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n"); + for (s = n->u.datafield.subfields; s; s = s->next) + { + size_t using_code_len = get_subfield_len(mt, s->code_data, + identifier_length); + wrbuf_puts(w, sep); + sep = ",\n"; + wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\""); + wrbuf_iconv_json_write(w, mt->iconv_cd, + s->code_data, using_code_len); + wrbuf_puts(w, "\":\""); + wrbuf_iconv_json_puts(w, mt->iconv_cd, + s->code_data + using_code_len); + wrbuf_puts(w, "\"\n\t\t\t\t\t}"); + } + wrbuf_puts(w, "\n\t\t\t\t]"); + if (n->u.datafield.indicator[0]) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1, + n->u.datafield.indicator[i]); + } + } + wrbuf_puts(w, "\n\t\t\t}\n"); + wrbuf_puts(w, "\n\t\t}"); + break; + } + } + wrbuf_puts(w, "\n\t]\n"); + wrbuf_puts(w, "}\n"); + return 0; +} int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) { @@ -1348,45 +1351,11 @@ int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, return r; } -void yaz_marc_set_read_format(yaz_marc_t mt, int format) -{ - if (mt) - mt->input_format = format; -} - -int yaz_marc_get_read_format(yaz_marc_t mt) -{ - if (mt) - return mt->input_format; - return -1; -} - - -void yaz_marc_set_write_format(yaz_marc_t mt, int format) -{ - if (mt) { - mt->output_format = format; - } -} - -int yaz_marc_get_write_format(yaz_marc_t mt) -{ - if (mt) - return mt->output_format; - return -1; -} - - -/** - * Deprecated, use yaz_marc_set_write_format - */ void yaz_marc_xml(yaz_marc_t mt, int xmlmode) { - yaz_marc_set_write_format(mt, xmlmode); + mt->output_format = xmlmode; } - - void yaz_marc_debug(yaz_marc_t mt, int level) { if (mt) @@ -1449,7 +1418,7 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) { const char *vp = strchr(val+1, '\''); size_t len; - + if (!vp) return -1; len = vp-val-1; @@ -1475,17 +1444,19 @@ static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) int yaz_marc_decode_formatstr(const char *arg) { - int mode = -1; + int mode = -1; if (!strcmp(arg, "marc")) mode = YAZ_MARC_ISO2709; if (!strcmp(arg, "marcxml")) mode = YAZ_MARC_MARCXML; - if (!strcmp(arg, "tmarcxml")) - mode = YAZ_MARC_TMARCXML; + if (!strcmp(arg, "turbomarc")) + mode = YAZ_MARC_TURBOMARC; if (!strcmp(arg, "marcxchange")) mode = YAZ_MARC_XCHANGE; if (!strcmp(arg, "line")) mode = YAZ_MARC_LINE; + if (!strcmp(arg, "json")) + mode = YAZ_MARC_JSON; return mode; } @@ -1494,12 +1465,16 @@ void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable) mt->write_using_libxml2 = enable; } -int yaz_marc_is_turbo_format(yaz_marc_t mt) +int yaz_marc_check_marc21_coding(const char *charset, + const char *marc_buf, int sz) { - return mt->output_format == YAZ_MARC_TMARCXML; + if ((!yaz_matchstr(charset, "MARC8?") || + !yaz_matchstr(charset, "MARC8")) && marc_buf && sz > 25 + && marc_buf[9] == 'a') + return 1; + return 0; } - /* * Local variables: * c-basic-offset: 4