X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fmarcdisp.c;h=514d8e4bf514947164774685f481a86bc4da836e;hb=c584380ce880fe0e0ba036f476f8a037763eac21;hp=8473f9f0d29715a7b4a0d9edf65f8e3b0cee5505;hpb=96a3a7bc95bf4014fb1224a7e3ad711e5e20e785;p=yaz-moved-to-github.git diff --git a/src/marcdisp.c b/src/marcdisp.c index 8473f9f..514d8e4 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.14 2005-02-02 23:07:56 adam Exp $ + * $Id: marcdisp.c,v 1.23 2005-06-25 15:46:04 adam Exp $ */ /** @@ -65,11 +65,19 @@ void yaz_marc_destroy(yaz_marc_t mt) static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr) { if (mt->xml == YAZ_MARC_ISO2709) - wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); + wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); else if (mt->xml == YAZ_MARC_LINE) - wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); + wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); else - wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len); + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len); +} + +static int atoi_n_check(const char *buf, int size, int *val) +{ + if (!isdigit(*(const unsigned char *) buf)) + return 0; + *val = atoi_n(buf, size); + return 1; } int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) @@ -78,51 +86,74 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int record_length; int indicator_length; int identifier_length; + int end_of_directory; int base_address; int length_data_entry; int length_starting; int length_implementation; + char lead[24]; + int produce_warnings = 0; - wrbuf_rewind(wr); + if (mt->debug) + produce_warnings = 1; + if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC + || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE) + produce_warnings = 1; record_length = atoi_n (buf, 5); if (record_length < 25) { - if (mt->debug) - { - char str[40]; - - sprintf (str, "Record length %d - aborting\n", record_length); - wrbuf_puts (wr, str); - } + if (mt->debug) + wrbuf_printf(wr, "\n", + record_length); return -1; } + memcpy(lead, buf, 24); /* se can modify the header for output */ + /* ballout if bsize is known and record_length is less than that */ if (bsize != -1 && record_length > bsize) - return -1; - if (isdigit(((const unsigned char *) buf)[10])) - indicator_length = atoi_n (buf+10, 1); - else + return -1; + if (!atoi_n_check(buf+10, 1, &indicator_length)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + lead[10] = '2'; indicator_length = 2; - if (isdigit(((const unsigned char *) buf)[11])) - identifier_length = atoi_n (buf+11, 1); - else + } + if (!atoi_n_check(buf+11, 1, &identifier_length)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + lead[11] = '2'; identifier_length = 2; - base_address = atoi_n (buf+12, 5); - - length_data_entry = atoi_n (buf+20, 1); - if (buf[20] <= '0' || buf[20] >= '9') + } + if (!atoi_n_check(buf+12, 5, &base_address)) { - wrbuf_printf(wr, "\n"); - length_data_entry = 4; + if (produce_warnings) + wrbuf_printf(wr, "\n"); + base_address = 0; } - length_starting = atoi_n (buf+21, 1); - if (buf[21] <= '0' || buf[21] >= '9') + if (!atoi_n_check(buf+20, 1, &length_data_entry)) { - wrbuf_printf(wr, "\n"); - length_starting = 5; + if (produce_warnings) + wrbuf_printf(wr, "\n"); + length_data_entry = 4; + lead[20] = '4'; + } + if (!atoi_n_check(buf+21, 1, &length_starting)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + length_starting = 5; + lead[21] = '5'; + } + if (!atoi_n_check(buf+22, 1, &length_implementation)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + length_implementation = 0; + lead[22] = '0'; } - length_implementation = atoi_n (buf+22, 1); if (mt->xml != YAZ_MARC_LINE) { @@ -130,8 +161,8 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int i; switch(mt->xml) { - case YAZ_MARC_ISO2709: - break; + case YAZ_MARC_ISO2709: + break; case YAZ_MARC_SIMPLEXML: wrbuf_puts (wr, "\n" " "); -#if 1 - marc_cdata(mt, buf, 9, wr); - marc_cdata(mt, "a", 1, wr); /* set leader to signal unicode */ - marc_cdata(mt, buf+10, 14, wr); -#else - marc_cdata(mt, buf, 24, wr); /* leave header as is .. */ -#endif + lead[9] = 'a'; /* set leader to signal unicode */ + marc_cdata(mt, lead, 24, wr); + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_XCHANGE: + wrbuf_printf( + wr, + "\n" + " "); + marc_cdata(mt, lead, 24, wr); wrbuf_printf(wr, "\n"); break; } } if (mt->debug) { - char str[40]; - - if (mt->xml) - wrbuf_puts (wr, "\n"); + char str[40]; + + wrbuf_puts (wr, "\n"); } /* first pass. determine length of directory & base of data */ for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) { - entry_p += 3+length_data_entry+length_starting; - if (entry_p >= record_length) + /* length of directory entry */ + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + wrbuf_printf (wr, "\n", entry_p); return -1; + } + if (mt->debug) + wrbuf_printf (wr, "\n", + entry_p, buf+entry_p); + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + wrbuf_printf (wr, "\n", entry_p); + break; + } + entry_p += 3 + length_data_entry + length_starting; } - if (mt->debug && base_address != entry_p+1) + end_of_directory = entry_p; + if (base_address != entry_p+1) { - wrbuf_printf (wr," \n", base_address, entry_p+1); + if (produce_warnings) + wrbuf_printf (wr,"\n", base_address, entry_p+1); } - base_address = entry_p+1; - if (mt->xml == YAZ_MARC_ISO2709) { - WRBUF wr_head = wrbuf_alloc(); - WRBUF wr_dir = wrbuf_alloc(); - WRBUF wr_tmp = wrbuf_alloc(); - - int data_p = 0; - /* second pass. create directory for ISO2709 output */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - { - int data_length, data_offset, end_offset; - int i, sz1, sz2; - - wrbuf_write(wr_dir, buf+entry_p, 3); - entry_p += 3; - - data_length = atoi_n (buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && - i < end_offset) - i++; - sz1 = 1+i - (data_offset + base_address); - if (mt->iconv_cd) - { - sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd, - buf + data_offset+base_address, sz1); - wrbuf_rewind(wr_tmp); - } - else - sz2 = sz1; - wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2); - wrbuf_printf(wr_dir, "%0*d", length_starting, data_p); - data_p += sz2; - } - wrbuf_putc(wr_dir, ISO2709_FS); - wrbuf_printf(wr_head, "%05d", data_p+1 + base_address); - wrbuf_write(wr_head, buf+5, 7); - wrbuf_printf(wr_head, "%05d", base_address); - wrbuf_write(wr_head, buf+17, 7); - - wrbuf_write(wr, wrbuf_buf(wr_head), 24); - wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); - wrbuf_free(wr_head, 1); - wrbuf_free(wr_dir, 1); - wrbuf_free(wr_tmp, 1); + WRBUF wr_head = wrbuf_alloc(); + WRBUF wr_dir = wrbuf_alloc(); + WRBUF wr_tmp = wrbuf_alloc(); + + int data_p = 0; + /* second pass. create directory for ISO2709 output */ + for (entry_p = 24; entry_p != end_of_directory; ) + { + int data_length, data_offset, end_offset; + int i, sz1, sz2; + + wrbuf_write(wr_dir, buf+entry_p, 3); + entry_p += 3; + + data_length = atoi_n (buf+entry_p, length_data_entry); + entry_p += length_data_entry; + data_offset = atoi_n (buf+entry_p, length_starting); + entry_p += length_starting; + i = data_offset + base_address; + end_offset = i+data_length-1; + + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + return -1; + + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + i++; + sz1 = 1+i - (data_offset + base_address); + if (mt->iconv_cd) + { + sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd, + buf + data_offset+base_address, sz1); + wrbuf_rewind(wr_tmp); + } + else + sz2 = sz1; + wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2); + wrbuf_printf(wr_dir, "%0*d", length_starting, data_p); + data_p += sz2; + } + wrbuf_putc(wr_dir, ISO2709_FS); + wrbuf_printf(wr_head, "%05d", data_p+1 + base_address); + wrbuf_write(wr_head, lead+5, 7); + wrbuf_printf(wr_head, "%05d", base_address); + wrbuf_write(wr_head, lead+17, 7); + + wrbuf_write(wr, wrbuf_buf(wr_head), 24); + wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); + wrbuf_free(wr_head, 1); + wrbuf_free(wr_dir, 1); + wrbuf_free(wr_tmp, 1); } /* third pass. create data output */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; - int data_offset; - int end_offset; - int i, j; - char tag[4]; + int data_offset; + int end_offset; + int i, j; + char tag[4]; int identifier_flag = 0; - int entry_p0; + int entry_p0 = entry_p; memcpy (tag, buf+entry_p, 3); - entry_p += 3; - entry_p0 = entry_p; + entry_p += 3; tag[3] = '\0'; - data_length = atoi_n (buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (mt->debug) - { - wrbuf_printf(wr, "\n", - entry_p0, data_length, data_offset); - } + data_length = atoi_n (buf+entry_p, length_data_entry); + entry_p += length_data_entry; + data_offset = atoi_n (buf+entry_p, length_starting); + entry_p += length_starting; + i = data_offset + base_address; + end_offset = i+data_length-1; + + if (data_length <= 0 || data_offset < 0) + break; - if (indicator_length < 4 && indicator_length > 0) + if (mt->debug) { - if (buf[i + indicator_length] == ISO2709_IDFS) - identifier_flag = 1; - else if (buf[i + indicator_length + 1] == ISO2709_IDFS) - identifier_flag = 2; + wrbuf_printf(wr, "\n", + entry_p0, data_length, data_offset); + } + if (end_offset >= record_length) + { + wrbuf_printf (wr,"\n", + entry_p0, end_offset, record_length); + break; } - else if (memcmp (tag, "00", 2)) - identifier_flag = 1; + if (memcmp (tag, "00", 2)) + identifier_flag = 1; /* if not 00X assume subfields */ + else if (indicator_length < 4 && indicator_length > 0) + { + /* Danmarc 00X have subfields */ + if (buf[i + indicator_length] == ISO2709_IDFS) + identifier_flag = 1; + else if (buf[i + indicator_length + 1] == ISO2709_IDFS) + identifier_flag = 2; + } + + if (mt->debug) + { + wrbuf_printf(wr, "\n", + identifier_flag); + } + switch(mt->xml) { case YAZ_MARC_LINE: - if (mt->debug) - wrbuf_puts (wr, "Tag: "); wrbuf_puts (wr, tag); wrbuf_puts (wr, " "); break; case YAZ_MARC_SIMPLEXML: wrbuf_printf (wr, "xml) { - case YAZ_MARC_ISO2709: - wrbuf_putc(wr, buf[i]); - break; + case YAZ_MARC_ISO2709: + wrbuf_putc(wr, buf[i]); + break; case YAZ_MARC_LINE: - if (mt->debug) - wrbuf_puts (wr, " Ind: "); wrbuf_putc(wr, buf[i]); break; case YAZ_MARC_SIMPLEXML: wrbuf_printf(wr, " Indicator%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); + marc_cdata(mt, buf+i, 1, wr); wrbuf_printf(wr, "\""); break; case YAZ_MARC_OAIMARC: wrbuf_printf(wr, " i%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); + marc_cdata(mt, buf+i, 1, wr); wrbuf_printf(wr, "\""); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: wrbuf_printf(wr, " ind%d=\"", j+1); - marc_cdata(mt, buf+i, 1, wr); + marc_cdata(mt, buf+i, 1, wr); wrbuf_printf(wr, "\""); } } - } + } if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML - || mt->xml == YAZ_MARC_OAIMARC) + || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE) { wrbuf_puts (wr, ">"); if (identifier_flag) wrbuf_puts (wr, "\n"); } - if (mt->xml == YAZ_MARC_LINE) - { - if (mt->debug) - wrbuf_puts (wr, " Fields: "); - } if (identifier_flag) { - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) { int i0; i++; switch(mt->xml) { - case YAZ_MARC_ISO2709: - --i; - wrbuf_iconv_write(wr, mt->iconv_cd, - buf+i, identifier_length); - i += identifier_length; - break; + case YAZ_MARC_ISO2709: + --i; + wrbuf_iconv_write(wr, mt->iconv_cd, + buf+i, identifier_length); + i += identifier_length; + break; case YAZ_MARC_LINE: wrbuf_puts (wr, mt->subfield_str); - marc_cdata(mt, buf+i, identifier_length-1, wr); - i = i+identifier_length-1; + marc_cdata(mt, buf+i, identifier_length-1, wr); + i = i+identifier_length-1; wrbuf_putc (wr, ' '); break; case YAZ_MARC_SIMPLEXML: wrbuf_puts (wr, " "); break; case YAZ_MARC_OAIMARC: wrbuf_puts (wr, " "); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: wrbuf_puts (wr, " "); break; } i0 = i; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS && i < end_offset) + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && + buf[i] != ISO2709_FS) i++; marc_cdata(mt, buf + i0, i - i0, wr); - if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS) - marc_cdata(mt, buf + i, 1, wr); + if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS) + marc_cdata(mt, buf + i, 1, wr); - if (mt->xml == YAZ_MARC_SIMPLEXML || - mt->xml == YAZ_MARC_MARCXML || - mt->xml == YAZ_MARC_OAIMARC) + if (mt->xml == YAZ_MARC_SIMPLEXML || + mt->xml == YAZ_MARC_MARCXML || + mt->xml == YAZ_MARC_XCHANGE || + mt->xml == YAZ_MARC_OAIMARC) wrbuf_puts (wr, "\n"); } } else { int i0 = i; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) i++; - marc_cdata(mt, buf + i0, i - i0, wr); - if (mt->xml == YAZ_MARC_ISO2709) - marc_cdata(mt, buf + i, 1, wr); - } + marc_cdata(mt, buf + i0, i - i0, wr); + if (mt->xml == YAZ_MARC_ISO2709) + marc_cdata(mt, buf + i, 1, wr); + } if (mt->xml == YAZ_MARC_LINE) wrbuf_puts (wr, mt->endline_str); - if (i < end_offset) - wrbuf_printf(wr, " \n", data_length); - if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - wrbuf_printf(wr, " \n", data_length); + if (i < end_offset) + wrbuf_printf(wr, "\n", data_length); + if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + wrbuf_printf(wr, "\n", data_length); switch(mt->xml) { case YAZ_MARC_SIMPLEXML: @@ -458,6 +528,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_puts (wr, "\n"); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: if (identifier_flag) wrbuf_puts (wr, " \n"); else @@ -477,11 +548,12 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_puts (wr, "\n"); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: wrbuf_puts (wr, "\n"); break; case YAZ_MARC_ISO2709: - wrbuf_putc (wr, ISO2709_RS); - break; + wrbuf_putc (wr, ISO2709_RS); + break; } return record_length; } @@ -489,14 +561,14 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, char **result, int *rsize) { - int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); - if (r > 0) - { - if (result) - *result = wrbuf_buf(mt->m_wr); - if (rsize) - *rsize = wrbuf_len(mt->m_wr); - } + int r; + + wrbuf_rewind(mt->m_wr); + r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); + if (result) + *result = wrbuf_buf(mt->m_wr); + if (rsize) + *rsize = wrbuf_len(mt->m_wr); return r; } @@ -545,9 +617,9 @@ int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize) mt->debug = debug; r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr); if (!outf) - outf = stdout; + outf = stdout; if (r > 0) - fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf); + fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf); yaz_marc_destroy(mt); return r; } @@ -564,3 +636,11 @@ int marc_display (const char *buf, FILE *outf) return marc_display_ex (buf, outf, 0); } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +