X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarcdisp.c;h=4e4230bb62805801f0ad1106295a6317a9b75ebd;hp=1266d250baace991d7c9eb38e3fd7d535d146d18;hb=84d7b06c13daa609e93f353e655c4b02f936d65c;hpb=a19316337aa3e23b74dac29e5dcad06493ecf083 diff --git a/src/marcdisp.c b/src/marcdisp.c index 1266d25..4e4230b 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,8 +1,13 @@ /* - * Copyright (c) 1995-2003, Index Data + * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.3 2003-12-17 12:28:07 adam Exp $ + * $Id: marcdisp.c,v 1.21 2005-04-20 13:17:51 adam Exp $ + */ + +/** + * \file marcdisp.c + * \brief Implements MARC display - and conversion utilities */ #if HAVE_CONFIG_H @@ -21,6 +26,8 @@ struct yaz_marc_t_ { int xml; int debug; yaz_iconv_t iconv_cd; + char subfield_str[8]; + char endline_str[8]; }; yaz_marc_t yaz_marc_create(void) @@ -30,9 +37,23 @@ yaz_marc_t yaz_marc_create(void) mt->debug = 0; mt->m_wr = wrbuf_alloc(); mt->iconv_cd = 0; + strcpy(mt->subfield_str, " $"); + strcpy(mt->endline_str, "\n"); return mt; } +void yaz_marc_subfield_str(yaz_marc_t mt, const char *s) +{ + strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1); + mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0'; +} + +void yaz_marc_endline_str(yaz_marc_t mt, const char *s) +{ + strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1); + mt->endline_str[sizeof(mt->endline_str)-1] = '\0'; +} + void yaz_marc_destroy(yaz_marc_t mt) { if (!mt) @@ -43,56 +64,20 @@ void yaz_marc_destroy(yaz_marc_t mt) static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr) { - size_t i; if (mt->xml == YAZ_MARC_ISO2709) - { wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); - } else if (mt->xml == YAZ_MARC_LINE) - { wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); - } else - { - int j = 0; - for (i = 0; i j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, "<"); - j=i+1; - break; - case '>': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, ">"); - j=i+1; - break; - case '&': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, "&"); - j=i+1; - break; - case '"': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, """); - j=i+1; - break; - case '\'': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, "'"); - j=i+1; - break; - } - } - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len); +} + +static int atoi_n_check(const char *buf, int size, int *val) +{ + if (!isdigit(*(const unsigned char *) buf)) + return 0; + *val = atoi_n(buf, size); + return 1; } int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) @@ -101,41 +86,74 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int record_length; int indicator_length; int identifier_length; + int end_of_directory; int base_address; int length_data_entry; int length_starting; int length_implementation; + char lead[24]; + int produce_warnings = 0; - wrbuf_rewind(wr); + if (mt->debug) + produce_warnings = 1; + if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC + || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE) + produce_warnings = 1; record_length = atoi_n (buf, 5); if (record_length < 25) { if (mt->debug) - { - char str[40]; - - sprintf (str, "Record length %d - aborting\n", record_length); - wrbuf_puts (wr, str); - } + wrbuf_printf(wr, "\n", + record_length); return -1; } + memcpy(lead, buf, 24); /* se can modify the header for output */ + /* ballout if bsize is known and record_length is less than that */ if (bsize != -1 && record_length > bsize) return -1; - if (isdigit(buf[10])) - indicator_length = atoi_n (buf+10, 1); - else - indicator_length = 2; - if (isdigit(buf[11])) - identifier_length = atoi_n (buf+11, 1); - else + if (!atoi_n_check(buf+10, 1, &indicator_length)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + lead[10] = '2'; + indicator_length = 2; + } + if (!atoi_n_check(buf+11, 1, &identifier_length)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + lead[11] = '2'; identifier_length = 2; - base_address = atoi_n (buf+12, 5); - - length_data_entry = atoi_n (buf+20, 1); - length_starting = atoi_n (buf+21, 1); - length_implementation = atoi_n (buf+22, 1); + } + if (!atoi_n_check(buf+12, 5, &base_address)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + base_address = 0; + } + if (!atoi_n_check(buf+20, 1, &length_data_entry)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + length_data_entry = 4; + lead[20] = '4'; + } + if (!atoi_n_check(buf+21, 1, &length_starting)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + length_starting = 5; + lead[21] = '5'; + } + if (!atoi_n_check(buf+22, 1, &length_implementation)) + { + if (produce_warnings) + wrbuf_printf(wr, "\n"); + length_implementation = 0; + lead[22] = '0'; + } if (mt->xml != YAZ_MARC_LINE) { @@ -177,7 +195,18 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_printf( wr, "\n" - " %.24s\n", buf); + " "); + lead[9] = 'a'; /* set leader to signal unicode */ + marc_cdata(mt, lead, 24, wr); + wrbuf_printf(wr, "\n"); + break; + case YAZ_MARC_XCHANGE: + wrbuf_printf( + wr, + "\n" + " "); + marc_cdata(mt, lead, 24, wr); + wrbuf_printf(wr, "\n"); break; } } @@ -185,8 +214,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) { char str[40]; - if (mt->xml) - wrbuf_puts (wr, "\n"); + wrbuf_puts (wr, "-->\n"); } /* first pass. determine length of directory & base of data */ for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) { - entry_p += 3+length_data_entry+length_starting; - if (entry_p >= record_length) + /* length of directory entry */ + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + wrbuf_printf (wr, "\n", entry_p); return -1; + } + if (mt->debug) + wrbuf_printf (wr, "\n", + entry_p, buf+entry_p); + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + wrbuf_printf (wr, "\n", entry_p); + break; + } + entry_p += 3 + length_data_entry + length_starting; } - if (mt->debug && base_address != entry_p+1) + end_of_directory = entry_p; + if (base_address != entry_p+1) { - wrbuf_printf (wr," \n", base_address, entry_p+1); + if (produce_warnings) + wrbuf_printf (wr,"\n", base_address, entry_p+1); } - base_address = entry_p+1; - if (mt->xml == YAZ_MARC_ISO2709) { WRBUF wr_head = wrbuf_alloc(); @@ -227,7 +274,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) int data_p = 0; /* second pass. create directory for ISO2709 output */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length, data_offset, end_offset; int i, sz1, sz2; @@ -242,8 +289,11 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) i = data_offset + base_address; end_offset = i+data_length-1; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && - i < end_offset) + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + return -1; + + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) i++; sz1 = 1+i - (data_offset + base_address); if (mt->iconv_cd) @@ -260,9 +310,9 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) } wrbuf_putc(wr_dir, ISO2709_FS); wrbuf_printf(wr_head, "%05d", data_p+1 + base_address); - wrbuf_write(wr_head, buf+5, 7); + wrbuf_write(wr_head, lead+5, 7); wrbuf_printf(wr_head, "%05d", base_address); - wrbuf_write(wr_head, buf+17, 7); + wrbuf_write(wr_head, lead+17, 7); wrbuf_write(wr, wrbuf_buf(wr_head), 24); wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); @@ -271,14 +321,15 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_free(wr_tmp, 1); } /* third pass. create data output */ - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; int end_offset; int i, j; char tag[4]; - int identifier_flag = 1; + int identifier_flag = 0; + int entry_p0 = entry_p; memcpy (tag, buf+entry_p, 3); entry_p += 3; @@ -289,41 +340,73 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) entry_p += length_starting; i = data_offset + base_address; end_offset = i+data_length-1; + + if (data_length <= 0 || data_offset < 0) + break; - if (indicator_length < 4 && indicator_length > 0) + if (mt->debug) + { + wrbuf_printf(wr, "\n", + entry_p0, data_length, data_offset); + } + if (end_offset >= record_length) + { + wrbuf_printf (wr,"\n", + entry_p0, end_offset, record_length); + break; + } + + if (memcmp (tag, "00", 2)) + identifier_flag = 1; /* if not 00X assume subfields */ + else if (indicator_length < 4 && indicator_length > 0) { - if (buf[i + indicator_length] != ISO2709_IDFS) - identifier_flag = 0; + /* Danmarc 00X have subfields */ + if (buf[i + indicator_length] == ISO2709_IDFS) + identifier_flag = 1; + else if (buf[i + indicator_length + 1] == ISO2709_IDFS) + identifier_flag = 2; } - else if (!memcmp (tag, "00", 2)) - identifier_flag = 0; - + + if (mt->debug) + { + wrbuf_printf(wr, "\n", + identifier_flag); + } + switch(mt->xml) { case YAZ_MARC_LINE: - if (mt->debug) - wrbuf_puts (wr, "Tag: "); wrbuf_puts (wr, tag); wrbuf_puts (wr, " "); break; case YAZ_MARC_SIMPLEXML: - wrbuf_printf (wr, "xml) @@ -332,36 +415,37 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_putc(wr, buf[i]); break; case YAZ_MARC_LINE: - if (mt->debug) - wrbuf_puts (wr, " Ind: "); wrbuf_putc(wr, buf[i]); break; case YAZ_MARC_SIMPLEXML: - wrbuf_printf(wr, " Indicator%d=\"%c\"", j+1, buf[i]); + wrbuf_printf(wr, " Indicator%d=\"", j+1); + marc_cdata(mt, buf+i, 1, wr); + wrbuf_printf(wr, "\""); break; case YAZ_MARC_OAIMARC: - wrbuf_printf(wr, " i%d=\"%c\"", j+1, buf[i]); + wrbuf_printf(wr, " i%d=\"", j+1); + marc_cdata(mt, buf+i, 1, wr); + wrbuf_printf(wr, "\""); break; case YAZ_MARC_MARCXML: - wrbuf_printf(wr, " ind%d=\"%c\"", j+1, buf[i]); + case YAZ_MARC_XCHANGE: + wrbuf_printf(wr, " ind%d=\"", j+1); + marc_cdata(mt, buf+i, 1, wr); + wrbuf_printf(wr, "\""); } } } if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML - || mt->xml == YAZ_MARC_OAIMARC) + || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE) { wrbuf_puts (wr, ">"); if (identifier_flag) wrbuf_puts (wr, "\n"); } - if (mt->xml == YAZ_MARC_LINE) - { - if (mt->debug) - wrbuf_puts (wr, " Fields: "); - } if (identifier_flag) { - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) { int i0; i++; @@ -374,33 +458,35 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) i += identifier_length; break; case YAZ_MARC_LINE: - wrbuf_puts (wr, " $"); - for (j = 1; jsubfield_str); + marc_cdata(mt, buf+i, identifier_length-1, wr); + i = i+identifier_length-1; wrbuf_putc (wr, ' '); break; case YAZ_MARC_SIMPLEXML: wrbuf_puts (wr, " "); break; case YAZ_MARC_OAIMARC: wrbuf_puts (wr, " "); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: wrbuf_puts (wr, " "); break; } i0 = i; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS && i < end_offset) + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && + buf[i] != ISO2709_FS) i++; marc_cdata(mt, buf + i0, i - i0, wr); @@ -409,6 +495,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML || + mt->xml == YAZ_MARC_XCHANGE || mt->xml == YAZ_MARC_OAIMARC) wrbuf_puts (wr, "\n"); } @@ -416,18 +503,19 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) else { int i0 = i; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) + while (i < end_offset && + buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) i++; marc_cdata(mt, buf + i0, i - i0, wr); if (mt->xml == YAZ_MARC_ISO2709) marc_cdata(mt, buf + i, 1, wr); } if (mt->xml == YAZ_MARC_LINE) - wrbuf_putc (wr, '\n'); + wrbuf_puts (wr, mt->endline_str); if (i < end_offset) - wrbuf_puts (wr, " \n"); + wrbuf_printf(wr, "\n", data_length); if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - wrbuf_puts (wr, " \n"); + wrbuf_printf(wr, "\n", data_length); switch(mt->xml) { case YAZ_MARC_SIMPLEXML: @@ -435,15 +523,16 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) break; case YAZ_MARC_OAIMARC: if (identifier_flag) - wrbuf_puts (wr, " \n"); + wrbuf_puts (wr, "\n"); else - wrbuf_puts (wr, " \n"); + wrbuf_puts (wr, "\n"); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: if (identifier_flag) wrbuf_puts (wr, " \n"); else - wrbuf_puts (wr, " \n"); + wrbuf_puts (wr, "\n"); break; } } @@ -459,6 +548,7 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_puts (wr, "\n"); break; case YAZ_MARC_MARCXML: + case YAZ_MARC_XCHANGE: wrbuf_puts (wr, "\n"); break; case YAZ_MARC_ISO2709: @@ -472,13 +562,10 @@ int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize, char **result, int *rsize) { int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr); - if (r > 0) - { - if (result) - *result = wrbuf_buf(mt->m_wr); - if (rsize) - *rsize = wrbuf_len(mt->m_wr); - } + if (result) + *result = wrbuf_buf(mt->m_wr); + if (rsize) + *rsize = wrbuf_len(mt->m_wr); return r; }