X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fsiconv.c;h=2082781e19a6fcc58d937ea45916aaa55a55161a;hb=b925ea17d6f146a28d745b0d34e9eec6eafda21f;hp=00340067f6f34f4f15f982fa603c34a38ca76d73;hpb=2e40cc0b39b9e96695e70316c677f035f7bfd52e;p=yaz-moved-to-github.git diff --git a/src/siconv.c b/src/siconv.c index 0034006..2082781 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.25 2006-08-24 10:01:03 adam Exp $ + * $Id: siconv.c,v 1.27 2006-08-28 12:34:41 adam Exp $ */ /** * \file siconv.c @@ -12,12 +12,16 @@ * is used by YAZ to interface with iconv (if present). * For systems where iconv is not present, this layer * provides a few important conversions: UTF-8, MARC-8, Latin-1. + * + * MARC-8 reference: + * http://www.loc.gov/marc/specifications/speccharmarc8.html */ #if HAVE_CONFIG_H #include #endif +#include #include #include #include @@ -29,6 +33,7 @@ #include #endif + #include unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft, @@ -178,6 +183,21 @@ static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp, return x; } +static size_t yaz_init_marc8(yaz_iconv_t cd, unsigned char *inp, + size_t inbytesleft, size_t *no_read) +{ + cd->marc8_esc_mode = 'B'; + + cd->comb_offset = cd->comb_size = 0; + cd->compose_char = 0; + + cd->write_marc8_comb_no = 0; + cd->write_marc8_last = 0; + cd->write_marc8_page_chr = "\033(B"; + + return 0; +} + static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp, size_t inbytesleft, size_t *no_read) { @@ -818,21 +838,42 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, else { size_t r = flush_combos(cd, outbuf, outbytesleft); + const char *old_page_chr = cd->write_marc8_page_chr; if (r) return r; - if (strcmp(page_chr, cd->write_marc8_page_chr)) + if (strcmp(page_chr, old_page_chr)) { - size_t plen = strlen(page_chr); + size_t plen = 0; + const char *page_out = page_chr; - if (*outbytesleft < plen) + if (*outbytesleft < 8) { cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t) (-1); } - memcpy(*outbuf, page_chr, plen); + cd->write_marc8_page_chr = page_chr; + + if (!strcmp(old_page_chr, "\033p") + || !strcmp(old_page_chr, "\033g") + || !strcmp(old_page_chr, "\033b")) + { + /* Technique 1 leave */ + page_out = "\033s"; + if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */ + { + /* Must leave script + enter new page */ + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); + (*outbuf) += plen; + (*outbytesleft) -= plen; + page_out = page_chr; + } + } + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); (*outbuf) += plen; (*outbytesleft) -= plen; - cd->write_marc8_page_chr = page_chr; } cd->write_marc8_last = y; } @@ -923,13 +964,6 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) cd->read_handle = 0; cd->init_handle = 0; cd->my_errno = YAZ_ICONV_UNKNOWN; - cd->marc8_esc_mode = 'B'; - cd->comb_offset = cd->comb_size = 0; - cd->compose_char = 0; - - cd->write_marc8_comb_no = 0; - cd->write_marc8_last = 0; - cd->write_marc8_page_chr = "\033(B"; /* a useful hack: if fromcode has leading @, the library not use YAZ's own conversions .. */ @@ -949,9 +983,15 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) else if (!yaz_matchstr(fromcode, "UCS4LE")) cd->read_handle = yaz_read_UCS4LE; else if (!yaz_matchstr(fromcode, "MARC8")) + { cd->read_handle = yaz_read_marc8; + cd->init_handle = yaz_init_marc8; + } else if (!yaz_matchstr(fromcode, "MARC8s")) + { cd->read_handle = yaz_read_marc8s; + cd->init_handle = yaz_init_marc8; + } #if HAVE_WCHAR_H else if (!yaz_matchstr(fromcode, "WCHAR_T")) cd->read_handle = yaz_read_wchar_t; @@ -966,9 +1006,15 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) else if (!yaz_matchstr(tocode, "UCS4LE")) cd->write_handle = yaz_write_UCS4LE; else if (!yaz_matchstr(tocode, "MARC8")) + { cd->write_handle = yaz_write_marc8; + cd->init_handle = yaz_init_marc8; + } else if (!yaz_matchstr(tocode, "MARC8s")) + { cd->write_handle = yaz_write_marc8; + cd->init_handle = yaz_init_marc8; + } #if HAVE_WCHAR_H else if (!yaz_matchstr(tocode, "WCHAR_T")) cd->write_handle = yaz_write_wchar_t; @@ -1039,7 +1085,7 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, { if (cd->init_handle) { - size_t no_read; + size_t no_read = 0; size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf, *inbytesleft, &no_read); if (r)