From f578ebbcfe51125d91358a98a79ab8411f38933f Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 21 Aug 2003 10:29:00 +0000 Subject: [PATCH] New filter grs.marcxml. --- recctrl/grsread.h | 3 +- recctrl/marcread.c | 137 ++++++++++++++++++++++++++++++++++++++++++++-------- recctrl/recgrs.c | 6 ++- recctrl/xmlread.c | 15 +++++- 4 files changed, 138 insertions(+), 23 deletions(-) diff --git a/recctrl/grsread.h b/recctrl/grsread.h index 381cb63..0a3e23a 100644 --- a/recctrl/grsread.h +++ b/recctrl/grsread.h @@ -1,4 +1,4 @@ -/* $Id: grsread.h,v 1.12 2002-11-15 21:26:01 adam Exp $ +/* $Id: grsread.h,v 1.13 2003-08-21 10:29:00 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -55,6 +55,7 @@ extern RecTypeGrs recTypeGrs_sgml; extern RecTypeGrs recTypeGrs_regx; extern RecTypeGrs recTypeGrs_tcl; extern RecTypeGrs recTypeGrs_marc; +extern RecTypeGrs recTypeGrs_marcxml; extern RecTypeGrs recTypeGrs_xml; extern RecTypeGrs recTypeGrs_perl; diff --git a/recctrl/marcread.c b/recctrl/marcread.c index ea1abd4..f3214ff 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,4 +1,4 @@ -/* $Id: marcread.c,v 1.20 2003-03-05 16:43:31 adam Exp $ +/* $Id: marcread.c,v 1.21 2003-08-21 10:29:00 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -34,7 +34,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define MARC_DEBUG 0 #define MARCOMP_DEBUG 0 -static data1_node *grs_read_iso2709 (struct grs_read_info *p) +static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) { char buf[100000]; int entry_p; @@ -83,7 +83,18 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax"); return 0; } - res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); + if (marc_xml) + { + data1_node *lead; + const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0}; + + res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root); + + lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top); + data1_mk_text_n(p->dh, p->mem, buf, 24, lead); + } + else + res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); if ((marctab = res_root->u.root.absyn->marc)) { @@ -125,10 +136,11 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) entry_p += 3; tag[3] = '\0'; - - /* generate field node */ - res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent); - + if (marc_xml) + res = parent; + else + res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent); + #if MARC_DEBUG fprintf (outf, "%s ", tag); #endif @@ -142,17 +154,57 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) if (memcmp (tag, "00", 2) && indicator_length) { /* generate indicator node */ + if (marc_xml) + { + const char *attr[10]; + int j; + + attr[0] = "tag"; + attr[1] = tag; + attr[2] = 0; + + res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res); + + for (j = 0; jdh, p->mem, res, attr); + } + } + else + { #if MARC_DEBUG - int j; + int j; #endif - res = data1_mk_tag_n (p->dh, p->mem, - buf+i, indicator_length, 0 /* attr */, res); + res = data1_mk_tag_n (p->dh, p->mem, + buf+i, indicator_length, 0 /* attr */, res); #if MARC_DEBUG - for (j = 0; jdh, p->mem, "controlfield", attr, res); + } + } parent = res; /* traverse sub fields */ i0 = i; @@ -190,10 +242,28 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) } else if (memcmp (tag, "00", 2) && identifier_length) { - data1_node *res = - data1_mk_tag_n (p->dh, p->mem, - buf+i+1, identifier_length-1, - 0 /* attr */, parent); + data1_node *res; + if (marc_xml) + { + int j; + const char *attr[3]; + char code[10]; + + for (j = 1; jdh, p->mem, "subfield", + attr, parent); + } + else + { + res = data1_mk_tag_n (p->dh, p->mem, + buf+i+1, identifier_length-1, + 0 /* attr */, parent); + } #if MARC_DEBUG fprintf (outf, " $"); for (j = 1; ju.root.absyn->main_elements; e; e=e->next) + { + data1_tag *tag = e->tag; + + if (tag && tag->which == DATA1T_string && + !yaz_matchstr(tag->value.string, "mc?")) + parse_data1_tree(p, tag->value.string, root); + } + return root; +} + + data1_node *grs_read_marc(struct grs_read_info *p) { - data1_node *root = grs_read_iso2709(p); + data1_node *root = grs_read_iso2709(p, 0); data1_element *e; if (!root) @@ -677,3 +767,12 @@ static struct recTypeGrs marc_type = { }; RecTypeGrs recTypeGrs_marc = &marc_type; + +static struct recTypeGrs marcxml_type = { + "marcxml", + grs_init_marc, + grs_destroy_marc, + grs_read_marcxml +}; + +RecTypeGrs recTypeGrs_marcxml = &marcxml_type; diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index af13835..1bae99d 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.81 2003-06-17 22:22:57 adam Exp $ +/* $Id: recgrs.c,v 1.82 2003-08-21 10:29:00 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -62,7 +62,8 @@ static int read_grs_type (struct grs_handlers *h, strcpy (p->type, cp+1); for (gh = h->handlers; gh; gh = gh->next) { - if (!memcmp (type, gh->type->type, cp-type)) + if (!memcmp (type, gh->type->type, cp-type) && + gh->type->type[cp-type] == '\0') { if (!gh->initFlag) { @@ -99,6 +100,7 @@ static void *grs_init(RecType recType) grs_add_handler (h, recTypeGrs_tcl); #endif grs_add_handler (h, recTypeGrs_marc); + grs_add_handler (h, recTypeGrs_marcxml); #if HAVE_EXPAT_H grs_add_handler (h, recTypeGrs_xml); #endif diff --git a/recctrl/xmlread.c b/recctrl/xmlread.c index 7bbc2da..5071fc5 100644 --- a/recctrl/xmlread.c +++ b/recctrl/xmlread.c @@ -1,4 +1,4 @@ -/* $Id: xmlread.c,v 1.7 2003-05-05 09:58:42 adam Exp $ +/* $Id: xmlread.c,v 1.8 2003-08-21 10:29:00 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -379,7 +379,19 @@ static int cb_encoding_handler (void *userData, const char *name, /* HAVE_ICONV_H */ #endif +static void cb_ns_start(void *userData, const char *prefix, const char *uri) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix && uri) + yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri); +} +static void cb_ns_end(void *userData, const char *prefix) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix) + yaz_log(ui->loglevel, "cb_ns_end %s", prefix); +} data1_node *zebra_read_xml (data1_handle dh, int (*rf)(void *, char *, size_t), void *fh, NMEM m) @@ -406,6 +418,7 @@ data1_node *zebra_read_xml (data1_handle dh, XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end); XML_SetEntityDeclHandler (parser, cb_entity_decl); XML_SetExternalEntityRefHandler (parser, cb_external_entity); + XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end); #if HAVE_ICONV_H XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo); #endif -- 1.7.10.4