From 861f9deb72bd92679ea08d528b40993ada55cdb9 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 18 Dec 2006 10:32:11 +0000 Subject: [PATCH] Added yaz_marc_write_xml which creates MARCXML/MarcXchange record as Libxml2 tree. --- NEWS | 3 + include/yaz/marcdisp.h | 24 +++++++- src/marcdisp.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++-- util/marcdump.c | 13 +++- 4 files changed, 189 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index cedea0d..c137df4 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +Added yaz_marc_write_xml which creates MARCXML/MarcXchange record as +Libxml2 tree. + --- 2.1.42 2006/12/17 Fixed bug #775: char conversion does not handle Alternative UTF-8 diff --git a/include/yaz/marcdisp.h b/include/yaz/marcdisp.h index 8805f71..0e85c3b 100644 --- a/include/yaz/marcdisp.h +++ b/include/yaz/marcdisp.h @@ -24,7 +24,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* $Id: marcdisp.h,v 1.23 2006-12-15 19:28:46 adam Exp $ */ +/* $Id: marcdisp.h,v 1.24 2006-12-18 10:32:47 adam Exp $ */ /** * \file marcdisp.h @@ -225,6 +225,21 @@ YAZ_EXPORT int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wrbuf); */ YAZ_EXPORT int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wrbuf); +/** \brief writes MARC record as libxml2 tree + \param mt handle + \param root_ptr pointer to record node + \param ns namespace of record (such as "http://www.loc.gov/MARC21/slim") + \param format MarcXchange format (NULL for none) + \param type MarcXchange format (NULL for none) + \retval 0 Creation successful and *root_ptr is "record" node + \retval -1 ERROR +*/ +YAZ_EXPORT +int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type); + /** \brief sets leader spec (for modifying bytes in 24 byte leader) \param mt handle \param leader_spec @@ -351,6 +366,13 @@ int yaz_marc_get_debug(yaz_marc_t mt); YAZ_EXPORT int yaz_marc_decode_formatstr(const char *arg); +/** \brief enable writing of MARC XML records using Libxml2 + \param mt handle + \param enable 0=disable, 1=enable +*/ +YAZ_EXPORT +void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable); + YAZ_END_CDECL #endif diff --git a/src/marcdisp.c b/src/marcdisp.c index 54ad37b..3079d81 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.39 2006-12-15 19:28:47 adam Exp $ + * $Id: marcdisp.c,v 1.40 2006-12-18 10:33:22 adam Exp $ */ /** @@ -83,6 +83,7 @@ struct yaz_marc_t_ { NMEM nmem; int xml; int debug; + int write_using_libxml2; yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; @@ -97,6 +98,7 @@ yaz_marc_t yaz_marc_create(void) yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt)); mt->xml = YAZ_MARC_LINE; mt->debug = 0; + mt->write_using_libxml2 = 0; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; mt->leader_spec = 0; @@ -522,10 +524,10 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) \param format record format (e.g. "MARC21") \param type record type (e.g. "Bibliographic") */ -static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, - const char *ns, - const char *format, - const char *type) +static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) { struct yaz_marc_node *n; int identifier_length; @@ -620,6 +622,37 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, return 0; } +static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, + const char *ns, + const char *format, + const char *type) +{ + if (mt->write_using_libxml2) + { + int ret; + xmlNode *root_ptr; + + ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type); + if (ret == 0) + { + xmlChar *buf_out; + xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0"); + int len_out; + + xmlDocSetRootElement(doc, root_ptr); + xmlDocDumpMemory(doc, &buf_out, &len_out); + + wrbuf_write(wr, (const char *) buf_out, len_out); + wrbuf_puts(wr, ""); + xmlFree(buf_out); + xmlFreeDoc(doc); + } + return ret; + } + else + return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type); +} + int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) { if (!mt->leader_spec) @@ -637,6 +670,114 @@ int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr, 0, 0); } + +int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, + const char *ns, + const char *format, + const char *type) +{ +#if YAZ_HAVE_XML2 + struct yaz_marc_node *n; + int identifier_length; + const char *leader = 0; + xmlNode *record_ptr; + xmlNsPtr ns_record; + + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + leader = n->u.leader; + break; + } + + if (!leader) + return -1; + if (!atoi_n_check(leader+11, 1, &identifier_length)) + return -1; + + record_ptr = xmlNewNode(0, BAD_CAST "record"); + *root_ptr = record_ptr; + + ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); + + if (format) + xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); + if (type) + xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); + for (n = mt->nodes; n; n = n->next) + { + struct yaz_marc_subfield *s; + xmlNode *ptr; + + switch(n->which) + { + case YAZ_MARC_DATAFIELD: + ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); + if (n->u.datafield.indicator) + { + int i; + for (i = 0; n->u.datafield.indicator[i]; i++) + { + char ind_str[6]; + char ind_val[2]; + + sprintf(ind_str, "ind%d", i+1); + ind_val[0] = n->u.datafield.indicator[i]; + ind_val[1] = '\0'; + xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); + } + } + for (s = n->u.datafield.subfields; s; s = s->next) + { + char code_val[8]; + + xmlNode *ptr_subfield; + /* if identifier length is 2 (most MARCs), + the code is a single character .. However we've + seen multibyte codes, so see how big it really is */ + size_t using_code_len = + (identifier_length != 2) ? identifier_length - 1 + : + cdata_one_character(mt, s->code_data); + + if (using_code_len >= sizeof(code_val)-1) + continue; + + ptr_subfield = xmlNewTextChild( + ptr, ns_record, + BAD_CAST "subfield", + BAD_CAST (s->code_data + using_code_len)); + + memcpy(code_val, s->code_data, using_code_len); + code_val[using_code_len] = '\0'; + + xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST code_val); + } + break; + case YAZ_MARC_CONTROLFIELD: + ptr = xmlNewTextChild(record_ptr, ns_record, + BAD_CAST "controlfield", + BAD_CAST n->u.controlfield.data); + + xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); + break; + case YAZ_MARC_COMMENT: + ptr = xmlNewComment(BAD_CAST n->u.comment); + xmlAddChild(record_ptr, ptr); + break; + case YAZ_MARC_LEADER: + xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader", + BAD_CAST n->u.leader); + break; + } + } + return 0; +#else + return -1; +#endif +} + int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; @@ -944,6 +1085,11 @@ int yaz_marc_decode_formatstr(const char *arg) return mode; } +void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable) +{ + mt->write_using_libxml2 = enable; +} + /* * Local variables: * c-basic-offset: 4 diff --git a/util/marcdump.c b/util/marcdump.c index 254c2d4..8e9c4d8 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdump.c,v 1.45 2006-12-15 19:28:48 adam Exp $ + * $Id: marcdump.c,v 1.46 2006-12-18 10:33:52 adam Exp $ */ #define _FILE_OFFSET_BITS 64 @@ -126,6 +126,7 @@ static void marcdump_read_xml(yaz_marc_t mt, const char *fname) static void dump(const char *fname, const char *from, const char *to, int input_format, int output_format, + int write_using_libxml2, int print_offset, const char *split_fname, int split_chunk, int verbose, FILE *cfile, const char *leader_spec) { @@ -151,6 +152,7 @@ static void dump(const char *fname, const char *from, const char *to, yaz_marc_iconv(mt, cd); } yaz_marc_xml(mt, output_format); + yaz_marc_write_using_libxml2(mt, write_using_libxml2); yaz_marc_debug(mt, verbose); if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE) @@ -315,6 +317,7 @@ int main (int argc, char **argv) int split_chunk = 1; const char *split_fname = 0; const char *leader_spec = 0; + int write_using_libxml2 = 0; #if HAVE_LOCALE_H setlocale(LC_CTYPE, ""); @@ -340,6 +343,13 @@ int main (int argc, char **argv) } break; case 'o': + /* dirty hack so we can make Libxml2 do the writing .. + rather than WRBUF */ + if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0) + { + arg = arg + 4; + write_using_libxml2 = 1; + } output_format = yaz_marc_decode_formatstr(arg); if (output_format == -1) { @@ -400,6 +410,7 @@ int main (int argc, char **argv) break; case 0: dump(arg, from, to, input_format, output_format, + write_using_libxml2, print_offset, split_fname, split_chunk, verbose, cfile, leader_spec); break; -- 1.7.10.4