X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fsiconv.c;h=b04072931c0fd57cc3e61b2e3859954164d8bcd1;hb=951fce6496397a6d8c11e15ab5b60b46abc43467;hp=ba54b163b5e2fa698beec385c62011d8d7bedd7c;hpb=e87336d1ad9587d0a7fdc805e2b53c77d435d67c;p=yaz-moved-to-github.git diff --git a/src/siconv.c b/src/siconv.c index ba54b16..b040729 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 1995-2006, Index Data ApS + * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.24 2006-08-04 14:35:40 adam Exp $ + * $Id: siconv.c,v 1.32 2007-01-03 08:42:15 adam Exp $ */ /** * \file siconv.c @@ -12,12 +12,16 @@ * is used by YAZ to interface with iconv (if present). * For systems where iconv is not present, this layer * provides a few important conversions: UTF-8, MARC-8, Latin-1. + * + * MARC-8 reference: + * http://www.loc.gov/marc/specifications/speccharmarc8.html */ #if HAVE_CONFIG_H #include #endif +#include #include #include #include @@ -29,6 +33,7 @@ #include #endif + #include unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft, @@ -95,6 +100,7 @@ struct yaz_iconv_struct { unsigned long write_marc8_comb_ch[8]; size_t write_marc8_comb_no; + unsigned write_marc8_second_half_char; unsigned long write_marc8_last; const char *write_marc8_page_chr; }; @@ -178,6 +184,7 @@ static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp, return x; } + static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp, size_t inbytesleft, size_t *no_read) { @@ -426,7 +433,7 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, size_t inbytesleft0 = inbytesleft; inp++; inbytesleft--; - while(inbytesleft > 0 && strchr("(,$!", *inp)) + while(inbytesleft > 0 && strchr("(,$!)-", *inp)) { inbytesleft--; inp++; @@ -492,9 +499,16 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, } } -static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) +static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x, + char **outbuf, size_t *outbytesleft, + int last) +{ + return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno); +} + +size_t yaz_write_UTF8_char(unsigned long x, + char **outbuf, size_t *outbytesleft, + int *error) { unsigned char *outp = (unsigned char *) *outbuf; @@ -545,7 +559,7 @@ static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, } else { - cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */ + *error = YAZ_ICONV_E2BIG; /* not room for output */ return (size_t)(-1); } *outbuf = (char *) outp; @@ -731,7 +745,7 @@ static unsigned long lookup_marc8(yaz_iconv_t cd, x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb); if (x) { - *page_chr = "\033(1"; + *page_chr = "\033$1"; return x; } cd->my_errno = YAZ_ICONV_EILSEQ; @@ -743,7 +757,7 @@ static size_t flush_combos(yaz_iconv_t cd, char **outbuf, size_t *outbytesleft) { unsigned long y = cd->write_marc8_last; - unsigned char byte, second_half = 0; + unsigned char byte; char out_buf[10]; size_t i, out_no = 0; @@ -770,25 +784,21 @@ static size_t flush_combos(yaz_iconv_t cd, { /* all MARC-8 combined characters are simple bytes */ byte = (unsigned char )(cd->write_marc8_comb_ch[i]); - if (byte == 0xEB) - second_half = 0xEC; - else if (byte == 0xFA) - second_half = 0xFB; - *(*outbuf)++ = byte; (*outbytesleft)--; } memcpy(*outbuf, out_buf, out_no); *outbuf += out_no; (*outbytesleft) -= out_no; - if (second_half) + if (cd->write_marc8_second_half_char) { - *(*outbuf)++ = second_half; + *(*outbuf)++ = cd->write_marc8_second_half_char; (*outbytesleft)--; } cd->write_marc8_last = 0; cd->write_marc8_comb_no = 0; + cd->write_marc8_second_half_char = 0; return 0; } @@ -805,27 +815,53 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, if (comb) { + if (x == 0x0361) + cd->write_marc8_second_half_char = 0xEC; + else if (x == 0x0360) + cd->write_marc8_second_half_char = 0xFB; + if (cd->write_marc8_comb_no < 6) cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y; } else { size_t r = flush_combos(cd, outbuf, outbytesleft); + const char *old_page_chr = cd->write_marc8_page_chr; if (r) return r; - if (strcmp(page_chr, cd->write_marc8_page_chr)) + if (strcmp(page_chr, old_page_chr)) { - size_t plen = strlen(page_chr); + size_t plen = 0; + const char *page_out = page_chr; - if (*outbytesleft < plen) + if (*outbytesleft < 8) { cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t) (-1); } - memcpy(*outbuf, page_chr, plen); + cd->write_marc8_page_chr = page_chr; + + if (!strcmp(old_page_chr, "\033p") + || !strcmp(old_page_chr, "\033g") + || !strcmp(old_page_chr, "\033b")) + { + /* Technique 1 leave */ + page_out = "\033s"; + if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */ + { + /* Must leave script + enter new page */ + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); + (*outbuf) += plen; + (*outbytesleft) -= plen; + page_out = page_chr; + } + } + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); (*outbuf) += plen; (*outbytesleft) -= plen; - cd->write_marc8_page_chr = page_chr; } cd->write_marc8_last = y; } @@ -916,13 +952,6 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) cd->read_handle = 0; cd->init_handle = 0; cd->my_errno = YAZ_ICONV_UNKNOWN; - cd->marc8_esc_mode = 'B'; - cd->comb_offset = cd->comb_size = 0; - cd->compose_char = 0; - - cd->write_marc8_comb_no = 0; - cd->write_marc8_last = 0; - cd->write_marc8_page_chr = "\033(B"; /* a useful hack: if fromcode has leading @, the library not use YAZ's own conversions .. */ @@ -1032,7 +1061,7 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, { if (cd->init_handle) { - size_t no_read; + size_t no_read = 0; size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf, *inbytesleft, &no_read); if (r) @@ -1045,6 +1074,16 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, *inbytesleft -= no_read; *inbuf += no_read; } + cd->marc8_esc_mode = 'B'; + + cd->comb_offset = cd->comb_size = 0; + cd->compose_char = 0; + + cd->write_marc8_comb_no = 0; + cd->write_marc8_second_half_char = 0; + cd->write_marc8_last = 0; + cd->write_marc8_page_chr = "\033(B"; + cd->init_flag = 0; cd->unget_x = 0; cd->no_read_x = 0;