From e8ac1e56929fe175624fb63e619c184b58ef63b9 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 28 Aug 2006 14:18:18 +0000 Subject: [PATCH] New option for yaz-marcdump -lpos=value which allows setting a portion of MARC header to a certain value. Added function yaz_mac_leader_spec. --- NEWS | 5 ++++ doc/yaz-marcdump-man.xml | 23 ++++++++++---- include/yaz/marcdisp.h | 14 ++++++++- src/marcdisp.c | 75 +++++++++++++++++++++++++++++++++++++++++++--- util/marcdump.c | 24 +++++++++++---- 5 files changed, 125 insertions(+), 16 deletions(-) diff --git a/NEWS b/NEWS index c18b0ac..dced8d3 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +New option for yaz-marcdump -lpos=value which allows setting a portion +of MARC header to a certain value. + +Fixes for MARC generation when encoded as MARC-8 (bug 642, 643). + ZOOM-C reads option extraArgs. If set the value of extraArgs is appended to SRU URI (POST/GET). Value is _raw_ URI part, such as x-id-a=v1&x-id-b=v2 . diff --git a/doc/yaz-marcdump-man.xml b/doc/yaz-marcdump-man.xml index d2d1ad9..6b324fd 100644 --- a/doc/yaz-marcdump-man.xml +++ b/doc/yaz-marcdump-man.xml @@ -2,7 +2,7 @@ "http://www.oasis-open.org/docbook/xml/4.1/docbookx.dtd" [ ]> - + yaz-marcdump @@ -98,7 +98,7 @@ - -ffrom] + -f from Specify the character set from of the input MARC record. @@ -107,7 +107,7 @@ - -tto] + -t to Specify the character set of of the output. @@ -116,6 +116,18 @@ + -l leaderspec + + Specify a simple modification string for MARC leader. The + leaderspec is a list of pos=value + pairs, where pos is an integer offset (0 - 23) for leader. Value + is either a quoted string or an integer (character value in decimal). + Pairs are comma separated. For example, to set leader at offset 9 + to a, use 9=a. + + + + -v Writes more information about the parsing process. @@ -129,9 +141,10 @@ EXAMPLES The following command converts MARC21/USMARC in MARC-8 encoding to - MARC21/USMARC in UTF-8 encoding. (Both input and output is in ISO2709). + MARC21/USMARC in UTF-8 encoding. Leader offset 9 is set to 'a'. + Both input and output records are ISO2709 encoded. - yaz-marcdump -f MARC-8 -t UTF-8 -I marc21.raw >marc21.utf8.raw + yaz-marcdump -f MARC-8 -t UTF-8 -I -l 9=97 marc21.raw >marc21.utf8.raw diff --git a/include/yaz/marcdisp.h b/include/yaz/marcdisp.h index 77fba49..b86f95e 100644 --- a/include/yaz/marcdisp.h +++ b/include/yaz/marcdisp.h @@ -23,7 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: marcdisp.h,v 1.17 2006-04-20 20:35:02 adam Exp $ + * $Id: marcdisp.h,v 1.18 2006-08-28 14:18:20 adam Exp $ */ /** @@ -196,6 +196,18 @@ YAZ_EXPORT int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wrbuf); */ YAZ_EXPORT int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wrbuf); +/** \brief sets leader spec (for modifying bytes in 24 byte leader) + \param mt handle + \param leader_spec + \retval 0 OK + \retval -1 ERROR + + Spec takes form pos=val,pos=val,... + where value is either a number (decimal char value) or a + string in 'a', e.g. 9='a' + +*/ +YAZ_EXPORT int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec); YAZ_END_CDECL #endif diff --git a/src/marcdisp.c b/src/marcdisp.c index 1d408be..e5fd62e 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.33 2006-08-28 12:34:40 adam Exp $ + * $Id: marcdisp.c,v 1.34 2006-08-28 14:18:22 adam Exp $ */ /** @@ -87,6 +87,7 @@ struct yaz_marc_t_ { yaz_iconv_t iconv_cd; char subfield_str[8]; char endline_str[8]; + char *leader_spec; struct yaz_marc_node *nodes; struct yaz_marc_node **nodes_pp; struct yaz_marc_subfield **subfield_pp; @@ -99,6 +100,7 @@ yaz_marc_t yaz_marc_create(void) mt->debug = 0; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; + mt->leader_spec = 0; strcpy(mt->subfield_str, " $"); strcpy(mt->endline_str, "\n"); @@ -112,10 +114,15 @@ void yaz_marc_destroy(yaz_marc_t mt) if (!mt) return ; nmem_destroy(mt->nmem); - wrbuf_free (mt->m_wr, 1); - xfree (mt); + wrbuf_free(mt->m_wr, 1); + xfree(mt->leader_spec); + xfree(mt); } +static int marc_exec_leader(const char *leader_spec, char *leader, + size_t size); + + struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt) { struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n)); @@ -158,6 +165,7 @@ void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len) struct yaz_marc_node *n = yaz_marc_add_node(mt); n->which = YAZ_MARC_LEADER; n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len); + marc_exec_leader(mt->leader_spec, n->u.leader, leader_len); } void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, @@ -574,7 +582,8 @@ static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr) { - yaz_marc_modify_leader(mt, 9, "a"); + if (!mt->leader_spec) + yaz_marc_modify_leader(mt, 9, "a"); return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim", 0, 0); } @@ -1211,6 +1220,64 @@ int marc_display (const char *buf, FILE *outf) return marc_display_ex (buf, outf, 0); } +int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec) +{ + xfree(mt->leader_spec); + mt->leader_spec = 0; + if (leader_spec) + { + char dummy_leader[24]; + if (marc_exec_leader(leader_spec, dummy_leader, 24)) + return -1; + mt->leader_spec = xstrdup(leader_spec); + } + return 0; +} + +static int marc_exec_leader(const char *leader_spec, char *leader, size_t size) +{ + const char *cp = leader_spec; + while (cp) + { + char val[21]; + int pos; + int no_read = 0, no = 0; + + no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read); + if (no < 2 || no_read < 3) + return -1; + if (pos < 0 || pos >= size) + return -1; + + if (*val == '\'') + { + const char *vp = strchr(val+1, '\''); + size_t len; + + if (!vp) + return -1; + len = vp-val-1; + if (len + pos > size) + return -1; + memcpy(leader + pos, val+1, len); + } + else if (*val >= '0' && *val <= '9') + { + int ch = atoi(val); + leader[pos] = ch; + } + else + return -1; + cp += no_read; + if (*cp != ',') + break; + + cp++; + } + return 0; +} + + /* * Local variables: * c-basic-offset: 4 diff --git a/util/marcdump.c b/util/marcdump.c index 9190964..c27e44f 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdump.c,v 1.41 2006-08-28 12:31:18 adam Exp $ + * $Id: marcdump.c,v 1.42 2006-08-28 14:18:23 adam Exp $ */ #define _FILE_OFFSET_BITS 64 @@ -50,7 +50,8 @@ static char *prog; static void usage(const char *prog) { - fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] [-I] [-v] [-s splitfname] file...\n", + fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] " + "[-I] [-l pos=value] [-v] [-s splitfname] file...\n", prog); } @@ -84,11 +85,17 @@ static void marcdump_read_xml(yaz_marc_t mt, const char *fname) static void dump(const char *fname, const char *from, const char *to, int read_xml, int xml, int print_offset, const char *split_fname, int verbose, - FILE *cfile) + FILE *cfile, const char *leader_spec) { yaz_marc_t mt = yaz_marc_create(); yaz_iconv_t cd = 0; - + + if (yaz_marc_leader_spec(mt, leader_spec)) + { + fprintf(stderr, "bad leader spec: %s\n", leader_spec); + yaz_marc_destroy(mt); + exit(2); + } if (from && to) { cd = yaz_iconv_open(to, from); @@ -96,6 +103,7 @@ static void dump(const char *fname, const char *from, const char *to, { fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to); + yaz_marc_destroy(mt); exit(2); } yaz_marc_iconv(mt, cd); @@ -259,6 +267,7 @@ int main (int argc, char **argv) char *from = 0, *to = 0; int read_xml = 0; const char *split_fname = 0; + const char *leader_spec = 0; #if HAVE_LOCALE_H setlocale(LC_CTYPE, ""); @@ -270,11 +279,14 @@ int main (int argc, char **argv) #endif prog = *argv; - while ((r = options("pvc:xOeXIf:t:s:", argv, argc, &arg)) != -2) + while ((r = options("pvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2) { no++; switch (r) { + case 'l': + leader_spec = arg; + break; case 'f': from = arg; break; @@ -317,7 +329,7 @@ int main (int argc, char **argv) break; case 0: dump(arg, from, to, read_xml, xml, - print_offset, split_fname, verbose, cfile); + print_offset, split_fname, verbose, cfile, leader_spec); break; case 'v': verbose++; -- 1.7.10.4