X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;h=3da18e923cd353c24075c78f8b0a37aa9ec2878b;hb=6dfee19bc1cec29ff5ec5e9cd8021d1354a6126e;hp=a651e05711f4540ba8bb1b6fade65db362f975e4;hpb=8ee402d79e37344b08c2b54ad45b50e8327a6c03;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c index a651e05..3da18e9 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,6 +1,6 @@ -/* $Id: marcread.c,v 1.25 2004-09-27 10:44:50 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: marcread.c,v 1.32 2005-12-08 11:06:49 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -24,10 +24,9 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#include #include #include -#include "grsread.h" +#include #include "marcomp.h" #include "inline.h" @@ -47,6 +46,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) int indicator_length; int identifier_length; int base_address; + int end_of_directory; int length_data_entry; int length_starting; int length_implementation; @@ -60,17 +60,29 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) if ((*p->readf)(p->fh, buf, 5) != 5) return NULL; + while (*buf < '0' || *buf > '9') + { + int i; + + yaz_log(YLOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", + *buf & 0xff, *buf & 0xff); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + + if ((*p->readf)(p->fh, buf+4, 1) != 1) + return NULL; + } record_length = atoi_n (buf, 5); if (record_length < 25) { - logf (LOG_WARN, "MARC record length < 25, is %d", record_length); + yaz_log (YLOG_WARN, "MARC record length < 25, is %d", record_length); return NULL; } /* read remaining part - attempt to read one byte furhter... */ read_bytes = (*p->readf)(p->fh, buf+5, record_length-4); if (read_bytes < record_length-5) { - logf (LOG_WARN, "Couldn't read whole MARC record"); + yaz_log (YLOG_WARN, "Couldn't read whole MARC record"); return NULL; } if (read_bytes == record_length - 4) @@ -85,7 +97,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) res_root = data1_mk_root (p->dh, p->mem, absynName); if (!res_root) { - yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax"); + yaz_log (YLOG_WARN, "cannot read MARC without an abstract syntax"); return 0; } if (marc_xml) @@ -101,7 +113,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) else res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); - if ((marctab = res_root->u.root.absyn->marc)) + if ((marctab = data1_absyn_getmarctab(p->dh, res_root->u.root.absyn))) { memcpy(marctab->leader, buf, 24); memcpy(marctab->implementation_codes, buf+6, 4); @@ -125,9 +137,32 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) length_implementation = atoi_n (buf+22, 1); for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - entry_p += 3+length_data_entry+length_starting; - base_address = entry_p+1; - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + yaz_log(YLOG_WARN, "MARC: Directory offset %d: end of record.", + entry_p); + return 0; + } + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + yaz_log(YLOG_LOG, "MARC: Bad directory"); + break; + } + entry_p += 3 + length_data_entry + length_starting; + } + end_of_directory = entry_p; + if (base_address != entry_p+1) + { + yaz_log(YLOG_WARN, "MARC: Base address does not follow directory"); + } + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; @@ -156,6 +191,12 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) i = data_offset + base_address; end_offset = i+data_length-1; + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + { + yaz_log(YLOG_WARN, "MARC: Bad offsets in data. Skipping rest"); + break; + } + if (memcmp (tag, "00", 2) && indicator_length) { /* generate indicator node */ @@ -376,7 +417,7 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf, wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); + yaz_log(YLOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); #endif pisf = found->next; } @@ -440,7 +481,7 @@ static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) int i; if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0) { - logf(LOG_WARN, "inline subfield ($%s): parse error", + yaz_log(YLOG_WARN, "inline subfield ($%s): parse error", psubf->u.tag.tag); inline_destroy_field(pif); return; @@ -482,14 +523,14 @@ static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) } else { - logf(LOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); + yaz_log(YLOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); } } } inline_destroy_field(pif); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf); + yaz_log(YLOG_LOG, "cat_inline_field(): got buffer {%s}", buf); #endif } @@ -534,7 +575,7 @@ static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, wrbuf_puts(buf, " "); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); + yaz_log(YLOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); #endif subfield = found->next; } @@ -610,7 +651,7 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, wrbuf_puts(buf, ""); } #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf); #endif return field->next; } @@ -628,7 +669,7 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, )) { #if MARCOMP_DEBUG - logf(LOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag); + yaz_log(YLOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag); #endif return field->next; } @@ -641,7 +682,7 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, cat_subfield(pf->list, buf, subfield); #if MARCOMP_DEBUG - logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); + yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf); #endif return field->next; @@ -653,7 +694,7 @@ static int is_empty(char *s) for (p = s; *p; p++) { - if (!isspace(*p)) + if (!isspace(*(unsigned char *)p)) return 0; } return 1; @@ -662,7 +703,7 @@ static int is_empty(char *s) static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root) { - data1_marctab *marctab = root->u.root.absyn->marc; + data1_marctab *marctab = data1_absyn_getmarctab(p->dh, root->u.root.absyn); data1_node *top = root->child; data1_node *field; mc_context *c; @@ -683,13 +724,13 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, } buf = wrbuf_alloc(); #if MARCOMP_DEBUG - logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); + yaz_log(YLOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); #endif if (!yaz_matchstr(pf->name, "ldr")) { data1_node *new; #if MARCOMP_DEBUG - logf(LOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions", + yaz_log(YLOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions", pf->interval.start, pf->interval.end); #endif new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); @@ -707,7 +748,7 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *new; char *pb; #if MARCOMP_DEBUG - logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); + yaz_log(YLOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); #endif wrbuf_rewind(buf); wrbuf_puts(buf, ""); @@ -743,7 +784,7 @@ data1_node *grs_read_marcxml(struct grs_read_info *p) if (!root) return 0; - for (e=root->u.root.absyn->main_elements; e; e=e->next) + for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next) { data1_tag *tag = e->tag; @@ -762,7 +803,7 @@ data1_node *grs_read_marc(struct grs_read_info *p) if (!root) return 0; - for (e=root->u.root.absyn->main_elements; e; e=e->next) + for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next) { data1_tag *tag = e->tag; @@ -805,6 +846,7 @@ static int retrieve_marc(void *clientData, struct recRetrieveCtrl *ctrl) } static struct recType marc_type = { + 0, "grs.marc", init_marc, config_marc, @@ -824,6 +866,7 @@ static int retrieve_marcxml(void *clientData, struct recRetrieveCtrl *ctrl) } static struct recType marcxml_type = { + 0, "grs.marcxml", init_marc, config_marc,