X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fsiconv.c;h=8deb714f9a7f884b7e3dfd2b2c444fbe93b6ffb7;hb=6f17341a80d2c4558e4944c0731231994b3da8bc;hp=ba54b163b5e2fa698beec385c62011d8d7bedd7c;hpb=e87336d1ad9587d0a7fdc805e2b53c77d435d67c;p=yaz-moved-to-github.git diff --git a/src/siconv.c b/src/siconv.c index ba54b16..8deb714 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.24 2006-08-04 14:35:40 adam Exp $ + * $Id: siconv.c,v 1.28 2006-08-30 20:14:51 adam Exp $ */ /** * \file siconv.c @@ -12,12 +12,16 @@ * is used by YAZ to interface with iconv (if present). * For systems where iconv is not present, this layer * provides a few important conversions: UTF-8, MARC-8, Latin-1. + * + * MARC-8 reference: + * http://www.loc.gov/marc/specifications/speccharmarc8.html */ #if HAVE_CONFIG_H #include #endif +#include #include #include #include @@ -29,6 +33,7 @@ #include #endif + #include unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft, @@ -178,6 +183,21 @@ static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp, return x; } +static size_t yaz_init_marc8(yaz_iconv_t cd, unsigned char *inp, + size_t inbytesleft, size_t *no_read) +{ + cd->marc8_esc_mode = 'B'; + + cd->comb_offset = cd->comb_size = 0; + cd->compose_char = 0; + + cd->write_marc8_comb_no = 0; + cd->write_marc8_last = 0; + cd->write_marc8_page_chr = "\033(B"; + + return 0; +} + static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp, size_t inbytesleft, size_t *no_read) { @@ -492,9 +512,16 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, } } -static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) +static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x, + char **outbuf, size_t *outbytesleft, + int last) +{ + return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno); +} + +size_t yaz_write_UTF8_char(unsigned long x, + char **outbuf, size_t *outbytesleft, + int *error) { unsigned char *outp = (unsigned char *) *outbuf; @@ -545,7 +572,7 @@ static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, } else { - cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */ + *error = YAZ_ICONV_E2BIG; /* not room for output */ return (size_t)(-1); } *outbuf = (char *) outp; @@ -731,7 +758,7 @@ static unsigned long lookup_marc8(yaz_iconv_t cd, x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb); if (x) { - *page_chr = "\033(1"; + *page_chr = "\033$1"; return x; } cd->my_errno = YAZ_ICONV_EILSEQ; @@ -811,21 +838,42 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, else { size_t r = flush_combos(cd, outbuf, outbytesleft); + const char *old_page_chr = cd->write_marc8_page_chr; if (r) return r; - if (strcmp(page_chr, cd->write_marc8_page_chr)) + if (strcmp(page_chr, old_page_chr)) { - size_t plen = strlen(page_chr); + size_t plen = 0; + const char *page_out = page_chr; - if (*outbytesleft < plen) + if (*outbytesleft < 8) { cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t) (-1); } - memcpy(*outbuf, page_chr, plen); + cd->write_marc8_page_chr = page_chr; + + if (!strcmp(old_page_chr, "\033p") + || !strcmp(old_page_chr, "\033g") + || !strcmp(old_page_chr, "\033b")) + { + /* Technique 1 leave */ + page_out = "\033s"; + if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */ + { + /* Must leave script + enter new page */ + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); + (*outbuf) += plen; + (*outbytesleft) -= plen; + page_out = page_chr; + } + } + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); (*outbuf) += plen; (*outbytesleft) -= plen; - cd->write_marc8_page_chr = page_chr; } cd->write_marc8_last = y; } @@ -916,13 +964,6 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) cd->read_handle = 0; cd->init_handle = 0; cd->my_errno = YAZ_ICONV_UNKNOWN; - cd->marc8_esc_mode = 'B'; - cd->comb_offset = cd->comb_size = 0; - cd->compose_char = 0; - - cd->write_marc8_comb_no = 0; - cd->write_marc8_last = 0; - cd->write_marc8_page_chr = "\033(B"; /* a useful hack: if fromcode has leading @, the library not use YAZ's own conversions .. */ @@ -942,9 +983,15 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) else if (!yaz_matchstr(fromcode, "UCS4LE")) cd->read_handle = yaz_read_UCS4LE; else if (!yaz_matchstr(fromcode, "MARC8")) + { cd->read_handle = yaz_read_marc8; + cd->init_handle = yaz_init_marc8; + } else if (!yaz_matchstr(fromcode, "MARC8s")) + { cd->read_handle = yaz_read_marc8s; + cd->init_handle = yaz_init_marc8; + } #if HAVE_WCHAR_H else if (!yaz_matchstr(fromcode, "WCHAR_T")) cd->read_handle = yaz_read_wchar_t; @@ -959,9 +1006,15 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) else if (!yaz_matchstr(tocode, "UCS4LE")) cd->write_handle = yaz_write_UCS4LE; else if (!yaz_matchstr(tocode, "MARC8")) + { cd->write_handle = yaz_write_marc8; + cd->init_handle = yaz_init_marc8; + } else if (!yaz_matchstr(tocode, "MARC8s")) + { cd->write_handle = yaz_write_marc8; + cd->init_handle = yaz_init_marc8; + } #if HAVE_WCHAR_H else if (!yaz_matchstr(tocode, "WCHAR_T")) cd->write_handle = yaz_write_wchar_t; @@ -1032,7 +1085,7 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, { if (cd->init_handle) { - size_t no_read; + size_t no_read = 0; size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf, *inbytesleft, &no_read); if (r)