From 9fb32b9ec35a83d916e0227c751d4a7925301d34 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 5 Mar 2008 21:21:22 +0000 Subject: [PATCH] For MARC-8 decoding avoid messing up G0 when G1 is selected. Use two flags for G0/G1 selection so that selection of G1 does not conflict with selection of G0. Bug #2115. Note that G1 is not properly handled. We must add &127 for G1 decoding since those characters have bit 7 set. A flag must be parsed to the conversion routines probably. --- src/siconv.c | 31 ++++++++++++++++++++++++------- test/tsticonv.c | 10 ++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/siconv.c b/src/siconv.c index 663d03a..e7a0de0 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.48 2007-10-15 20:45:05 adam Exp $ + * $Id: siconv.c,v 1.49 2008-03-05 21:21:22 adam Exp $ */ /** * \file siconv.c @@ -100,7 +100,8 @@ struct yaz_iconv_struct { char **outbuf, size_t *outbytesleft); size_t (*flush_handle)(yaz_iconv_t cd, char **outbuf, size_t *outbytesleft); - int marc8_esc_mode; + int g0_mode; + int g1_mode; int comb_offset; int comb_size; @@ -1168,22 +1169,36 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, *no_read = 0; while(inbytesleft >= 1 && inp[0] == 27) { + int ch; size_t inbytesleft0 = inbytesleft; inp++; inbytesleft--; - while(inbytesleft > 0 && strchr("(,$!)-", *inp)) + if (inbytesleft > 0 && *inp == '$') { inbytesleft--; inp++; } - if (inbytesleft <= 0) + if (inbytesleft <= 1) { *no_read = 0; cd->my_errno = YAZ_ICONV_EINVAL; return 0; } - cd->marc8_esc_mode = *inp++; inbytesleft--; + ch = *inp++; + if (inbytesleft > 0 && (ch == '(' || ch == ',')) + { + inbytesleft--; + cd->g0_mode = *inp++; + } + else if (inbytesleft > 0 && (ch == ')' || ch == '-')) + { + inbytesleft--; + cd->g1_mode = *inp++; + } + else + cd->g0_mode = ch; + (*no_read) += inbytesleft0 - inbytesleft; } if (inbytesleft <= 0) @@ -1197,9 +1212,10 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, { unsigned long x; size_t no_read_sub = 0; + int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode; *comb = 0; - switch(cd->marc8_esc_mode) + switch(mode) { case 'B': /* Basic ASCII */ case 's': /* ASCII */ @@ -1896,7 +1912,8 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, if (cd->init_flag) { cd->my_errno = YAZ_ICONV_UNKNOWN; - cd->marc8_esc_mode = 'B'; + cd->g0_mode = 'B'; + cd->g1_mode = 'B'; cd->comb_offset = cd->comb_size = 0; cd->compose_char = 0; diff --git a/test/tsticonv.c b/test/tsticonv.c index 843df37..b39923b 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.33 2007-11-16 08:01:47 adam Exp $ + * $Id: tsticonv.c,v 1.34 2008-03-05 21:21:22 adam Exp $ */ #if HAVE_CONFIG_H @@ -17,6 +17,8 @@ #include #include +#define ESC "\x1b" + static int compare_buffers(char *msg, int no, int expect_len, const char *expect_buf, int got_len, const char *got_buf) @@ -386,16 +388,20 @@ static void tst_marc8_to_utf8(void) if (!cd) return; + /* bug #2115 */ + YAZ_CHECK(tst_convert(cd, ESC "(N" ESC ")Qp" ESC "(B", "\xd0\x9f")); + + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); /* COMBINING ACUTE ACCENT */ YAZ_CHECK(tst_convert(cd, "Cours de mathâe", "Cours de mathe\xcc\x81")); - YAZ_CHECK(tst_convert(cd, "a\xea\x1e", "a\x1e\xcc\x8a")); YAZ_CHECK(tst_convert(cd, "a\xea", "a")); + yaz_iconv_close(cd); } -- 1.7.10.4