From fb276eb339f39e6233de8e7540c4408089e8d3b3 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 7 Apr 2010 15:43:44 +0200 Subject: [PATCH 1/1] Update TurboMARC definition and document it Document TurboMARC in the tools chapter of the YAZ manual. Update definition format mode to YAZ_MARC_TURBOMARC. The string as used in yaz-marcdump and other tools is called "turbomarc". Rename function yaz_marc_write_turbo_xml to yaz_marc_write_turbomarc. --- doc/tools.xml | 127 ++++++++++++++++++++++++++++++++++++++++++---- doc/yaz-marcdump-man.xml | 16 +++++- include/yaz/marcdisp.h | 12 ++--- src/marc_read_xml.c | 4 +- src/marcdisp.c | 12 ++--- src/record_conv.c | 6 +-- src/zoom-c.c | 2 +- test/tstmarc.sh | 8 +-- util/marcdump.c | 2 +- 9 files changed, 153 insertions(+), 36 deletions(-) diff --git a/doc/tools.xml b/doc/tools.xml index ec97f06..b487442 100644 --- a/doc/tools.xml +++ b/doc/tools.xml @@ -1912,9 +1912,9 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); MARC - YAZ provides a fast utility that decodes MARC records and - encodes to a varity of output formats. The MARC records must - be encoded in ISO2709. + YAZ provides a fast utility for working with MARC records. + Early versions of the MARC utility only allowed decoding of ISO2709. + Today the utility may both encode - and decode to a varity of formats. @@ -1932,6 +1932,8 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); #define YAZ_MARC_MARCXML 3 #define YAZ_MARC_ISO2709 4 #define YAZ_MARC_XCHANGE 5 + #define YAZ_MARC_CHECK 6 + #define YAZ_MARC_TURBOMARC 7 /* supply iconv handle for character set conversion .. */ void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd); @@ -1941,15 +1943,22 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. On success, result in *result with size *rsize. */ - int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, - char **result, int *rsize); + int yaz_marc_decode_buf(yaz_marc_t mt, const char *buf, int bsize, + const char **result, size_t *rsize); /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure. On success, result in WRBUF */ - int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, - int bsize, WRBUF wrbuf); + int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, + int bsize, WRBUF wrbuf); ]]> + + + The synopsis is just a basic subset of all functionality. Refer + to the actual header file marcdisp.h for + details. + + A MARC conversion handle must be created by using yaz_marc_create and destroyed @@ -1974,7 +1983,7 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); YAZ_MARC_MARCXML - The resulting record is converted to MARCXML. + MARCXML. @@ -1983,10 +1992,41 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); YAZ_MARC_ISO2709 - The resulting record is converted to ISO2709 (MARC). + ISO2709 (sometimes just referred to as "MARC"). + + + + + + YAZ_MARC_XCHANGE + + + MarcXchange. + + + YAZ_MARC_CHECK + + + Pseudo format for validation only. Does not generate + any real output except diagnostics. + + + + + + YAZ_MARC_TURBOMARC + + + XML format with same semantics as MARCXML but more compact + and geared towards fast processing with XSLT. Refer to + for more information. + + + + @@ -2000,13 +2040,13 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f); Display of MARC record - The followint program snippet illustrates how the MARC API may + The following program snippet illustrates how the MARC API may be used to convert a MARC record to the line-by-line format: + + TurboMARC + + TurboMARC is yet another XML encoding of a MARC record. The format + was designed for fast processing with XSLT. + + + Applications like + Pazpar2 uses XSLT to convert an XML encode MARC record to an internal + representation. This conversion mostly check the tag of a MARC field + to determine the basic rules in the conversion. This check is + costly when that is tag is encoded as an attribute in MARCXML. + By having the tag value as the element instead, makes processing + many times faster (at least for Libxslt). + + + TurboMARC is encoded as follows: + + + Record elements is part of namespace + "http://www.indexdata.com/MARC21/turboxml". + + + A record is enclosed in element r. + + + A collection of records is enclosed in element + collection. + + + The leader is encoded as element l with the + leader content as its (text) value. + + + A control field is encoded as element c concatenated + with the tag value of the control field if the tag value + matches the regular expression [a-zA-Z0-9]*. + If the tag value do not match the regular expression + [a-zA-Z0-9]* the control field is encoded + as element c and attribute code + will hold the tag value. + This rule ensure that in the rare cases where a tag value might + result in a non-wellformed XML YAZ encode it as a coded attribute + (as in MARCXML). + + + The control field content is the the text value of this element. + Indicators are encoded as attribute names + i1, i2, etc.. and + corresponding values for each indicator. + + + A data field is encoded as element d concatenated + with the tag value of the data field or using the attribute + code as described in the rules for control fields. + The children of the data field element is subfield elements. + Each subfield element is encoded as s + concatenated with the sub field code. + The text of the subfield element is the contents of the subfield. + Indicators are encoded as attributes for the data field element similar + to the encoding for control fields. + + + + diff --git a/doc/yaz-marcdump-man.xml b/doc/yaz-marcdump-man.xml index 51ebcfb..dbc4060 100644 --- a/doc/yaz-marcdump-man.xml +++ b/doc/yaz-marcdump-man.xml @@ -79,7 +79,8 @@ Specifies input format. Must be one of marcxml, marc (ISO2709), marcxchange (ISO25577), - line (line mode MARC). + line (line mode MARC), + or turbomarc (Turbo MARC). @@ -89,7 +90,8 @@ Specifies output format. Must be one of marcxml, marc (ISO2709), marcxchange (ISO25577), - line (line mode MARC). + line (line mode MARC), + or turbomarc (Turbo MARC). @@ -191,6 +193,16 @@ yaz-marcdump -f MARC-8 -t UTF-8 -o marcxml marc21.raw >marcxml.xml + + + Turbo MARC is a compact XML notation with same semantics as + MARCXML, but which allows for faster processing via XSLT. In order + to generate Turbo MARC records encoded in UTF-8 from MARC21 (ISO), one + could use: + + yaz-marcdump -f MARC8 -t UTF8 -o turbomarc -i marc marc21.raw >out.xml + + FILES diff --git a/include/yaz/marcdisp.h b/include/yaz/marcdisp.h index a970de7..f472aba 100644 --- a/include/yaz/marcdisp.h +++ b/include/yaz/marcdisp.h @@ -73,9 +73,9 @@ YAZ_EXPORT void yaz_marc_xml(yaz_marc_t mt, int xmlmode); /** \brief Output format: MarcXchange (ISO25577) */ #define YAZ_MARC_XCHANGE 5 /** \brief Output format: check only (no marc output) */ -#define YAZ_MARC_CHECK 6 -/** \brief Output format: Turbo MARCXML Index Data format*/ -#define YAZ_MARC_TMARCXML 7 +#define YAZ_MARC_CHECK 6 +/** \brief Output format: Turbo MARC Index Data format (XML based) */ +#define YAZ_MARC_TURBOMARC 7 /** \brief set iconv handle for character set conversion */ YAZ_EXPORT void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd); @@ -177,7 +177,7 @@ int yaz_marc_read_line(yaz_marc_t mt, void *client_data); #if YAZ_HAVE_XML2 -/** \brief parses MARCXML/MarcXchange record from xmlNode pointer +/** \brief parses MARCXML/MarcXchange/TurboMARC record from xmlNode pointer \param mt handle \param ptr is a pointer to root xml node \retval 0 OK @@ -204,13 +204,13 @@ YAZ_EXPORT int yaz_marc_write_line(yaz_marc_t mt, WRBUF wrbuf); */ YAZ_EXPORT int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wrbuf); -/** \brief writes record in TMARCXML format +/** \brief writes record in TurboMARC format \param mt handle \param wrbuf WRBUF for output \retval 0 OK \retval -1 ERROR */ -YAZ_EXPORT int yaz_marc_write_turbo_xml(yaz_marc_t mt, WRBUF wrbuf); +YAZ_EXPORT int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wrbuf); /** \brief writes record in MarcXchange XML (ISO25577) \param mt handle diff --git a/src/marc_read_xml.c b/src/marc_read_xml.c index 59ffb00..6b3e3dd 100644 --- a/src/marc_read_xml.c +++ b/src/marc_read_xml.c @@ -377,7 +377,7 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) } else if (!strcmp((const char *) ptr->name, "r")) { - format = YAZ_MARC_TMARCXML; + format = YAZ_MARC_TURBOMARC; break; } else @@ -402,7 +402,7 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) { case YAZ_MARC_MARCXML: return yaz_marc_read_xml_fields(mt, ptr->next); - case YAZ_MARC_TMARCXML: + case YAZ_MARC_TURBOMARC: return yaz_marc_read_turbo_xml_fields(mt, ptr->next); } return -1; diff --git a/src/marcdisp.c b/src/marcdisp.c index 10fa7b9..d4fdf6c 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -569,7 +569,7 @@ int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr) switch(mt->output_format) { case YAZ_MARC_MARCXML: - case YAZ_MARC_TMARCXML: + case YAZ_MARC_TURBOMARC: wrbuf_printf(wr, "\n"); break; case YAZ_MARC_XCHANGE: @@ -593,8 +593,8 @@ int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr) return yaz_marc_write_line(mt, wr); case YAZ_MARC_MARCXML: return yaz_marc_write_marcxml(mt, wr); - case YAZ_MARC_TMARCXML: - return yaz_marc_write_turbo_xml(mt, wr); + case YAZ_MARC_TURBOMARC: + return yaz_marc_write_turbomarc(mt, wr); case YAZ_MARC_XCHANGE: return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */ case YAZ_MARC_ISO2709: @@ -816,7 +816,7 @@ int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) 0, 0, 0); } -int yaz_marc_write_turbo_xml(yaz_marc_t mt, WRBUF wr) +int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr) { /* set leader 09 to 'a' for UNICODE */ /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */ @@ -1336,8 +1336,8 @@ int yaz_marc_decode_formatstr(const char *arg) mode = YAZ_MARC_ISO2709; if (!strcmp(arg, "marcxml")) mode = YAZ_MARC_MARCXML; - if (!strcmp(arg, "tmarcxml")) - mode = YAZ_MARC_TMARCXML; + if (!strcmp(arg, "turbomarc")) + mode = YAZ_MARC_TURBOMARC; if (!strcmp(arg, "marcxchange")) mode = YAZ_MARC_XCHANGE; if (!strcmp(arg, "line")) diff --git a/src/record_conv.c b/src/record_conv.c index 497203e..695f088 100644 --- a/src/record_conv.c +++ b/src/record_conv.c @@ -304,9 +304,9 @@ static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr) if (input_charset && !output_charset) output_charset = "utf-8"; } - else if (!strcmp(output_format, "tmarcxml")) + else if (!strcmp(output_format, "turbomarc")) { - output_format_mode = YAZ_MARC_TMARCXML; + output_format_mode = YAZ_MARC_TURBOMARC; if (input_charset && !output_charset) output_charset = "utf-8"; } @@ -479,7 +479,7 @@ static int yaz_record_conv_record_rule(yaz_record_conv_t p, ret = -1; } else if (r->u.marc.input_format == YAZ_MARC_MARCXML || - r->u.marc.input_format == YAZ_MARC_TMARCXML) + r->u.marc.input_format == YAZ_MARC_TURBOMARC) { xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), wrbuf_len(record)); diff --git a/src/zoom-c.c b/src/zoom-c.c index 24e6ad4..c579957 100644 --- a/src/zoom-c.c +++ b/src/zoom-c.c @@ -2196,7 +2196,7 @@ ZOOM_API(const char *) } else if (!strcmp(type, "txml")) { - return get_record_format(rec, len, npr, YAZ_MARC_TMARCXML, charset, + return get_record_format(rec, len, npr, YAZ_MARC_TURBOMARC, charset, format); } else if (!strcmp(type, "raw")) diff --git a/test/tstmarc.sh b/test/tstmarc.sh index f1ac177..2996bb4 100755 --- a/test/tstmarc.sh +++ b/test/tstmarc.sh @@ -101,12 +101,12 @@ binmarc_convert "xml,marcxml" "marcxml" "xml2" echo "binmarc -> marcxml(libxml2): $?" fi -binmarc_convert "tmarcxml" "tmarcxml" "t" -echo "binmarc -> tmarcxml: $?" +binmarc_convert "turbomarc" "turbomarc" "t" +echo "binmarc -> turbomarc: $?" if test -z "$noxmlwrite"; then -binmarc_convert "xml,tmarcxml" "tmarcxml" "xml2t" -echo "binmarc -> tmarcxml(libxml2): $?" +binmarc_convert "xml,turbomarc" "turbomarc" "xml2t" +echo "binmarc -> turbomarc(libxml2): $?" fi exit $ecode diff --git a/util/marcdump.c b/util/marcdump.c index c30623a..0696df8 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -223,7 +223,7 @@ static void dump(const char *fname, const char *from, const char *to, yaz_marc_write_using_libxml2(mt, write_using_libxml2); yaz_marc_debug(mt, verbose); - if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TMARCXML || input_format == YAZ_MARC_XCHANGE) + if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TURBOMARC || input_format == YAZ_MARC_XCHANGE) { #if YAZ_HAVE_XML2 marcdump_read_xml(mt, fname); -- 1.7.10.4