From f514c7fcb9cd0c205b308a1213112317d564874c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Sun, 6 Mar 2005 21:27:09 +0000 Subject: [PATCH] Better diagnostics for bad MARC records. --- src/marcdisp.c | 70 ++++++++++++++++++++++++++++++++++--------------------- util/marcdump.c | 13 +++++------ 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/src/marcdisp.c b/src/marcdisp.c index be233a7..c23b1b6 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.18 2005-02-25 17:04:45 adam Exp $ + * $Id: marcdisp.c,v 1.19 2005-03-06 21:27:09 adam Exp $ */ /** @@ -86,6 +86,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int record_length; int indicator_length; int identifier_length; + int end_of_directory; int base_address; int length_data_entry; int length_starting; @@ -236,18 +237,37 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) /* first pass. determine length of directory & base of data */ for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) { - entry_p += 3+length_data_entry+length_starting; - if (entry_p >= record_length) + /* length of directory entry */ + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + wrbuf_printf (wr, "\n", entry_p); return -1; + } + if (mt->debug) + wrbuf_printf (wr, "\n", + entry_p, buf+entry_p); + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + wrbuf_printf (wr, "\n", entry_p); + break; + } + entry_p += 3 + length_data_entry + length_starting; } + end_of_directory = entry_p; if (base_address != entry_p+1) { if (produce_warnings) - wrbuf_printf (wr," \n", base_address, entry_p+1); + wrbuf_printf (wr," \n", base_address, entry_p+1); } - base_address = entry_p+1; - if (mt->xml == YAZ_MARC_ISO2709) { WRBUF wr_head = wrbuf_alloc(); @@ -256,7 +276,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int data_p = 0; /* second pass. create directory for ISO2709 output */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length, data_offset, end_offset; int i, sz1, sz2; @@ -303,7 +323,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_free(wr_tmp, 1); } /* third pass. create data output */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; @@ -311,11 +331,10 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int i, j; char tag[4]; int identifier_flag = 0; - int entry_p0; + int entry_p0 = entry_p; memcpy (tag, buf+entry_p, 3); entry_p += 3; - entry_p0 = entry_p; tag[3] = '\0'; data_length = atoi_n (buf+entry_p, length_data_entry); entry_p += length_data_entry; @@ -324,20 +343,22 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) i = data_offset + base_address; end_offset = i+data_length-1; - if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) - { - if (produce_warnings) - wrbuf_printf (wr," \n", - data_length, data_offset); + if (data_length <= 0 || data_offset < 0) break; - } if (mt->debug) { - wrbuf_printf(wr, "\n", + wrbuf_printf(wr, "\n", entry_p0, data_length, data_offset); } + if (end_offset >= record_length) + { + wrbuf_printf (wr,"\n", + entry_p0, end_offset, record_length); + break; + } if (memcmp (tag, "00", 2)) identifier_flag = 1; /* if not 00X assume subfields */ @@ -552,13 +573,10 @@ int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, char **result, int *rsize) { int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); - if (r > 0) - { - if (result) - *result = wrbuf_buf(mt->m_wr); - if (rsize) - *rsize = wrbuf_len(mt->m_wr); - } + if (result) + *result = wrbuf_buf(mt->m_wr); + if (rsize) + *rsize = wrbuf_len(mt->m_wr); return r; } diff --git a/util/marcdump.c b/util/marcdump.c index 34700cf..6dd79d1 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdump.c,v 1.27 2005-02-08 13:51:31 adam Exp $ + * $Id: marcdump.c,v 1.28 2005-03-06 21:27:09 adam Exp $ */ #if HAVE_CONFIG_H @@ -193,7 +193,7 @@ int main (int argc, char **argv) while (1) { int len; - char *result; + char *result = 0; int rlen; r = fread (buf, 1, 5, inf); @@ -216,11 +216,10 @@ int main (int argc, char **argv) if (r < len) break; r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen); - if (r <= 0) - break; - fwrite (result, rlen, 1, stdout); + if (result) + fwrite (result, rlen, 1, stdout); #if HAVE_XML2 - if (libxml_dom_test) + if (r > 0 && libxml_dom_test) { xmlDocPtr doc = xmlParseMemory(result, rlen); if (!doc) @@ -256,7 +255,7 @@ int main (int argc, char **argv) } } #endif - if (cfile) + if (r > 0 && cfile) { char *p = buf; int i; -- 1.7.10.4