X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;h=323e96f784abcc43b3f6df08059adff112619270;hb=cd3ce7da1b13a23dc0f01b24abb9e52f87a12261;hp=f168081e30355302e061c1f5a0cc16125b398f58;hpb=0a5aa3b65fe14789bcada545a0e399545725d1ff;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c index f168081..323e96f 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,5 +1,5 @@ -/* $Id: marcread.c,v 1.18 2003-02-28 12:33:39 oleg Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: marcread.c,v 1.24 2004-06-16 22:12:30 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -34,7 +34,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define MARC_DEBUG 0 #define MARCOMP_DEBUG 0 -static data1_node *grs_read_iso2709 (struct grs_read_info *p) +static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) { char buf[100000]; int entry_p; @@ -83,9 +83,20 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax"); return 0; } - res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); + if (marc_xml) + { + data1_node *lead; + const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0}; + + res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root); + + lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top); + data1_mk_text_n(p->dh, p->mem, buf, 24, lead); + } + else + res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); - if (marctab = res_root->u.root.absyn->marc) + if ((marctab = res_root->u.root.absyn->marc)) { memcpy(marctab->leader, buf, 24); memcpy(marctab->implementation_codes, buf+6, 4); @@ -102,7 +113,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) identifier_length = marctab->force_identifier_length; else identifier_length = atoi_n (buf+11, 1); - base_address = atoi_n (buf+12, 4); + base_address = atoi_n (buf+12, 5); length_data_entry = atoi_n (buf+20, 1); length_starting = atoi_n (buf+21, 1); @@ -125,10 +136,11 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) entry_p += 3; tag[3] = '\0'; - - /* generate field node */ - res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent); - + if (marc_xml) + res = parent; + else + res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent); + #if MARC_DEBUG fprintf (outf, "%s ", tag); #endif @@ -142,58 +154,86 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p) if (memcmp (tag, "00", 2) && indicator_length) { /* generate indicator node */ + if (marc_xml) + { + const char *attr[10]; + int j; + + attr[0] = "tag"; + attr[1] = tag; + attr[2] = 0; + + res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res); + + for (j = 0; jdh, p->mem, res, attr); + } + } + else + { #if MARC_DEBUG - int j; + int j; #endif - res = data1_mk_tag_n (p->dh, p->mem, - buf+i, indicator_length, 0 /* attr */, res); + res = data1_mk_tag_n (p->dh, p->mem, + buf+i, indicator_length, 0 /* attr */, res); #if MARC_DEBUG - for (j = 0; jdh, p->mem, "controlfield", attr, res); + } + } parent = res; /* traverse sub fields */ i0 = i; while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) { - - if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")|| - !yaz_matchstr(absynName, "RUSMARC"))) - { - int go = 1; - data1_node *res = - data1_mk_tag_n (p->dh, p->mem, - buf+i+1, identifier_length-1, - 0 /* attr */, parent); - i += identifier_length; - i0 = i; - do { - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS && i < end_offset) - { - i++; - } - if (!memcmp(buf+i+1, "1", 1) && idh, p->mem, buf + i0, i - i0, res); - i0 = i; - } - else if (memcmp (tag, "00", 2) && identifier_length) + if (memcmp (tag, "00", 2) && identifier_length) { - data1_node *res = - data1_mk_tag_n (p->dh, p->mem, - buf+i+1, identifier_length-1, - 0 /* attr */, parent); + data1_node *res; + if (marc_xml) + { + int j; + const char *attr[3]; + char code[10]; + + for (j = 1; jdh, p->mem, "subfield", + attr, parent); + } + else + { + res = data1_mk_tag_n (p->dh, p->mem, + buf+i+1, identifier_length-1, + 0 /* attr */, parent); + } #if MARC_DEBUG fprintf (outf, " $"); for (j = 1; jprefix, "_")) { - strcat(strcat(buf, " "), p->prefix); + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); } if (p->interval.start == -1) { - strcat(buf, found->data); + wrbuf_puts(buf, found->data); } else { - strncat(buf, found->data+p->interval.start, - p->interval.end-p->interval.start+1); + wrbuf_write(buf, found->data+p->interval.start, + p->interval.end-p->interval.start); + wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) { - strcat(strcat(buf, p->suffix), " "); + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); @@ -365,30 +402,54 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_ } if (found) { - strcat(buf, " ("); + wrbuf_puts(buf, " ("); pisf = cat_inline_subfield(p->u.child, buf, pisf); - strcat(buf, ") "); + wrbuf_puts(buf, ") "); } } } return pisf; } -static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) -{ - + +static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) +{ if (!pf || !subfield) return; - for (;subfield; subfield = subfield->next) + for (;subfield;) { int len; - inline_field *pif = inline_parse(get_data(subfield,&len)); + inline_field *pif=NULL; + data1_node *psubf; + + if (yaz_matchstr(subfield->u.tag.tag, "1")) + { + subfield = subfield->next; + continue; + } + + psubf = subfield; + pif = inline_mk_field(); + do + { + int i; + if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0) + { + logf(LOG_WARN, "inline subfield ($%s): parse error", + psubf->u.tag.tag); + inline_destroy_field(pif); + return; + } + psubf = psubf->next; + } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1")); + + subfield = psubf; if (pif && !yaz_matchstr(pif->name, pf->name)) { if (!pf->list && pif->list) { - strcat(buf, pif->list->data); + wrbuf_puts(buf, pif->list->data); } else { @@ -409,14 +470,14 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) /* add separator for inline fields */ - if (strlen(buf)) + if (wrbuf_len(buf)) { - strcat(buf, "\n"); + wrbuf_puts(buf, "\n"); } } else { - logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name); + logf(LOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); } } } @@ -426,7 +487,9 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf); #endif } -static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield) + +static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, + data1_node *subfield) { mc_subfield *p; @@ -442,7 +505,8 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel if (strcmp(p->prefix, "_")) { - strcat(strcat(buf, " "), p->prefix); + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); } if (p->u.in_line) @@ -451,16 +515,18 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel } else if (p->interval.start == -1) { - strcat(buf, get_data(found, &len)); + wrbuf_puts(buf, get_data(found, &len)); } else { - strncat(buf, get_data(found, &len)+p->interval.start, - p->interval.end-p->interval.start+1); + wrbuf_write(buf, get_data(found, &len)+p->interval.start, + p->interval.end-p->interval.start); + wrbuf_puts(buf, ""); } if (strcmp(p->suffix, "_")) { - strcat(strcat(buf, p->suffix), " "); + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); @@ -493,15 +559,17 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel } if (found) { - strcat(buf, " ("); + wrbuf_puts(buf, " ("); subfield = cat_subfield(p->u.child, buf, subfield); - strcat(buf, ") "); + wrbuf_puts(buf, ") "); } } } return subfield; } -static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field) + +static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, + WRBUF buf, data1_node *field) { data1_node *subfield; int ind1, ind2; @@ -525,8 +593,17 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d if (!pf->list && subfield->which == DATA1N_data) { int len; - - strcat(buf, get_data(field, &len)); + + if (pf->interval.start == -1) + { + wrbuf_puts(buf, get_data(field, &len)); + } + else + { + wrbuf_write(buf, get_data(field, &len)+pf->interval.start, + pf->interval.end-pf->interval.start); + wrbuf_puts(buf, ""); + } #if MARCOMP_DEBUG logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); #endif @@ -564,6 +641,7 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d return field->next; } + static int is_empty(char *s) { char *p = s; @@ -575,14 +653,16 @@ static int is_empty(char *s) } return 1; } -static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root) + +static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, + data1_node *root) { data1_marctab *marctab = root->u.root.absyn->marc; data1_node *top = root->child; data1_node *field; mc_context *c; mc_field *pf; - char buf[1000000]; + WRBUF buf; c = mc_mk_context(mc_stmnt+3); @@ -596,6 +676,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data mc_destroy_context(c); return; } + buf = wrbuf_alloc(); #if MARCOMP_DEBUG logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); #endif @@ -619,13 +700,16 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data if (!yaz_matchstr(field->u.tag.tag, pf->name)) { data1_node *new; - char *pb = buf; + char *pb; #if MARCOMP_DEBUG logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); #endif - *buf = '\0'; + wrbuf_rewind(buf); + wrbuf_puts(buf, ""); + field = cat_field(p, pf, buf, field); + pb = wrbuf_buf(buf); for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n")) { if (!is_empty(pb)) @@ -643,11 +727,31 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data } mc_destroy_field(pf); mc_destroy_context(c); + wrbuf_free(buf, 1); +} + +data1_node *grs_read_marcxml(struct grs_read_info *p) +{ + data1_node *root = grs_read_iso2709(p, 1); + data1_element *e; + + if (!root) + return 0; + + for (e=root->u.root.absyn->main_elements; e; e=e->next) + { + data1_tag *tag = e->tag; + + if (tag && tag->which == DATA1T_string && + !yaz_matchstr(tag->value.string, "mc?")) + parse_data1_tree(p, tag->value.string, root); + } + return root; } data1_node *grs_read_marc(struct grs_read_info *p) { - data1_node *root = grs_read_iso2709(p); + data1_node *root = grs_read_iso2709(p, 0); data1_element *e; if (!root) @@ -663,6 +767,7 @@ data1_node *grs_read_marc(struct grs_read_info *p) } return root; } + static void *grs_init_marc(void) { return 0; @@ -680,3 +785,12 @@ static struct recTypeGrs marc_type = { }; RecTypeGrs recTypeGrs_marc = &marc_type; + +static struct recTypeGrs marcxml_type = { + "marcxml", + grs_init_marc, + grs_destroy_marc, + grs_read_marcxml +}; + +RecTypeGrs recTypeGrs_marcxml = &marcxml_type;