X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fsiconv.c;h=17e3d89ede55dcc1324fae860d85439a0eda16d3;hb=212ecdc7daf9764e6fdd0951281f61b9c1a94c2d;hp=945b6cc2bb796e33996ec0dcfcb676d589a59443;hpb=f66b53c033be535cc3fab8bcb2949fa2927b25d4;p=yaz-moved-to-github.git diff --git a/src/siconv.c b/src/siconv.c index 945b6cc..17e3d89 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.47 2007-10-12 14:22:19 adam Exp $ + * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $ */ /** * \file siconv.c @@ -100,7 +100,8 @@ struct yaz_iconv_struct { char **outbuf, size_t *outbytesleft); size_t (*flush_handle)(yaz_iconv_t cd, char **outbuf, size_t *outbytesleft); - int marc8_esc_mode; + int g0_mode; + int g1_mode; int comb_offset; int comb_size; @@ -113,10 +114,9 @@ struct yaz_iconv_struct { #endif unsigned long compose_char; - unsigned long write_marc8_comb_ch[8]; - size_t write_marc8_comb_no; unsigned write_marc8_second_half_char; unsigned long write_marc8_last; + const char *write_marc8_lpage; const char *write_marc8_g0; const char *write_marc8_g1; }; @@ -192,6 +192,10 @@ static struct { { 0, 0, 0} }; +static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, + char **outbuf, size_t *outbytesleft, + const char *page_chr); + static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp, size_t inbytesleft, size_t *no_read) { @@ -1165,22 +1169,36 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, *no_read = 0; while(inbytesleft >= 1 && inp[0] == 27) { + int ch; size_t inbytesleft0 = inbytesleft; inp++; inbytesleft--; - while(inbytesleft > 0 && strchr("(,$!)-", *inp)) + if (inbytesleft > 0 && *inp == '$') { inbytesleft--; inp++; } - if (inbytesleft <= 0) + if (inbytesleft <= 1) { *no_read = 0; cd->my_errno = YAZ_ICONV_EINVAL; return 0; } - cd->marc8_esc_mode = *inp++; inbytesleft--; + ch = *inp++; + if (inbytesleft > 0 && (ch == '(' || ch == ',')) + { + inbytesleft--; + cd->g0_mode = *inp++; + } + else if (inbytesleft > 0 && (ch == ')' || ch == '-')) + { + inbytesleft--; + cd->g1_mode = *inp++; + } + else + cd->g0_mode = ch; + (*no_read) += inbytesleft0 - inbytesleft; } if (inbytesleft <= 0) @@ -1194,9 +1212,10 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, { unsigned long x; size_t no_read_sub = 0; + int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode; *comb = 0; - switch(cd->marc8_esc_mode) + switch(mode) { case 'B': /* Basic ASCII */ case 's': /* ASCII */ @@ -1465,18 +1484,7 @@ static unsigned long lookup_marc8(yaz_iconv_t cd, x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb); if (x) { -#if 1 *page_chr = ESC "(B"; -#else - /* this possibly solves bug #1778 */ - *page_chr = ESC ")!E"; -#endif - return x; - } - x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb); - if (x) - { - *page_chr = ESC "g"; return x; } x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb); @@ -1543,12 +1551,21 @@ static size_t flush_combos(yaz_iconv_t cd, { unsigned long y = cd->write_marc8_last; unsigned char byte; - char out_buf[10]; - size_t i, out_no = 0; + char out_buf[4]; + size_t out_no = 0; if (!y) return 0; + assert(cd->write_marc8_lpage); + if (cd->write_marc8_lpage) + { + size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, + cd->write_marc8_lpage); + if (r) + return r; + } + byte = (unsigned char )((y>>16) & 0xff); if (byte) out_buf[out_no++] = byte; @@ -1559,19 +1576,12 @@ static size_t flush_combos(yaz_iconv_t cd, if (byte) out_buf[out_no++] = byte; - if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft) + if (out_no + 2 >= *outbytesleft) { cd->my_errno = YAZ_ICONV_E2BIG; return (size_t) (-1); } - for (i = 0; i < cd->write_marc8_comb_no; i++) - { - /* all MARC-8 combined characters are simple bytes */ - byte = (unsigned char )(cd->write_marc8_comb_ch[i]); - *(*outbuf)++ = byte; - (*outbytesleft)--; - } memcpy(*outbuf, out_buf, out_no); *outbuf += out_no; (*outbytesleft) -= out_no; @@ -1582,7 +1592,7 @@ static size_t flush_combos(yaz_iconv_t cd, } cd->write_marc8_last = 0; - cd->write_marc8_comb_no = 0; + cd->write_marc8_lpage = 0; cd->write_marc8_second_half_char = 0; return 0; } @@ -1652,7 +1662,8 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, { if (page_chr) { - size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr); + size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, + page_chr); if (r) return r; } @@ -1661,8 +1672,13 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, else if (x == 0x0360) cd->write_marc8_second_half_char = 0xFB; - if (cd->write_marc8_comb_no < 6) - cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y; + if (*outbytesleft <= 1) + { + cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t) (-1); + } + *(*outbuf)++ = y; + (*outbytesleft)--; } else { @@ -1670,13 +1686,8 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, if (r) return r; - if (page_chr) - { - r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr); - if (r) - return r; - } cd->write_marc8_last = y; + cd->write_marc8_lpage = page_chr; } return 0; } @@ -1704,6 +1715,7 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x, char *outbuf0 = *outbuf; size_t outbytesleft0 = *outbytesleft; int last_ch = cd->write_marc8_last; + const char *lpage = cd->write_marc8_lpage; r = yaz_write_marc8_2(cd, latin1_comb[i].x1, outbuf, outbytesleft); @@ -1717,6 +1729,7 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x, *outbuf = outbuf0; *outbytesleft = outbytesleft0; cd->write_marc8_last = last_ch; + cd->write_marc8_lpage = lpage; } return r; } @@ -1893,14 +1906,15 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, if (cd->init_flag) { cd->my_errno = YAZ_ICONV_UNKNOWN; - cd->marc8_esc_mode = 'B'; + cd->g0_mode = 'B'; + cd->g1_mode = 'B'; cd->comb_offset = cd->comb_size = 0; cd->compose_char = 0; - cd->write_marc8_comb_no = 0; cd->write_marc8_second_half_char = 0; cd->write_marc8_last = 0; + cd->write_marc8_lpage = 0; cd->write_marc8_g0 = ESC "(B"; cd->write_marc8_g1 = 0;