From: Adam Dickmeiss Date: Fri, 20 Jan 2006 15:30:24 +0000 (+0000) Subject: Try to cope with subfields in multi-byte encoding - such as UTF-8. X-Git-Tag: YAZ.2.1.12~24 X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=commitdiff_plain;h=b3f2d89141871dddbee9fba4061ece9fb90abcf2 Try to cope with subfields in multi-byte encoding - such as UTF-8. --- diff --git a/src/marcdisp.c b/src/marcdisp.c index 514d8e4..c1bf00d 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.23 2005-06-25 15:46:04 adam Exp $ + * $Id: marcdisp.c,v 1.24 2006-01-20 15:30:24 adam Exp $ */ /** @@ -72,6 +72,30 @@ static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr) wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len); } +/* try to guess how many bytes the identifier really is! */ +static size_t cdata_one_character(yaz_marc_t mt, const char *buf) +{ + if (mt->iconv_cd) + { + size_t i; + for (i = 1; i<5; i++) + { + char outbuf[12]; + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + const char *inp = buf; + + size_t inbytesleft = i; + size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft, + &outp, &outbytesleft); + if (r != (size_t) (-1)) + return i; /* got a complete sequence */ + } + return 1; /* giving up */ + } + return 1; /* we don't know */ +} + static int atoi_n_check(const char *buf, int size, int *val) { if (!isdigit(*(const unsigned char *) buf)) @@ -449,6 +473,11 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) { int i0; i++; + + int sb_octet_length = identifier_length-1; + if (identifier_length == 2) + sb_octet_length = cdata_one_character(mt, buf+i); + switch(mt->xml) { case YAZ_MARC_ISO2709: @@ -459,27 +488,27 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) break; case YAZ_MARC_LINE: wrbuf_puts (wr, mt->subfield_str); - marc_cdata(mt, buf+i, identifier_length-1, wr); - i = i+identifier_length-1; + marc_cdata(mt, buf+i, sb_octet_length, wr); + i = i+sb_octet_length; wrbuf_putc (wr, ' '); break; case YAZ_MARC_SIMPLEXML: wrbuf_puts (wr, " "); break; case YAZ_MARC_OAIMARC: wrbuf_puts (wr, " "); break; case YAZ_MARC_MARCXML: case YAZ_MARC_XCHANGE: wrbuf_puts (wr, " "); break; }