From d38ee71c31b49ad13164039140ed47d18e9432cb Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 20 Mar 2007 21:37:31 +0000 Subject: [PATCH] Attempted fix of bug #976: Segfault in yaz_iconv. The yaz_iconv function write handlers no longer carries a 'last' parameter. This will make yaz_iconv flush "less" characters. A flush is performed by call to yaz_iconv(cd, 0, 0, &outbut, &outbytesleft) . --- NEWS | 5 +++ include/yaz/wrbuf.h | 4 +- src/marcdisp.c | 15 +++---- src/siconv.c | 107 +++++++++++++++++++++++++----------------------- src/wrbuf.c | 15 ++++++- src/zoom-c.c | 27 +++--------- test/tst_record_conv.c | 8 ++-- test/tsticonv.c | 27 +++++++++++- 8 files changed, 118 insertions(+), 90 deletions(-) diff --git a/NEWS b/NEWS index 200cb5b..f3bf309 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +Attempted fix of bug #976: Segfault in yaz_iconv. The yaz_iconv function +write handlers no longer carries a 'last' parameter. This will make +yaz_iconv flush "less" characters. A flush is performed by call to +yaz_iconv(cd, 0, 0, &outbut, &outbytesleft) . + Definition of wrbuf_diags moved to querytowrbuf.h. Function wrbuf_put_zquery removed, because function yaz_query_to_wrbuf does the same. diff --git a/include/yaz/wrbuf.h b/include/yaz/wrbuf.h index e3c5de3..56a4023 100644 --- a/include/yaz/wrbuf.h +++ b/include/yaz/wrbuf.h @@ -24,7 +24,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* $Id: wrbuf.h,v 1.25 2007-03-19 14:40:06 adam Exp $ */ +/* $Id: wrbuf.h,v 1.26 2007-03-20 21:37:31 adam Exp $ */ /** * \file wrbuf.h @@ -78,6 +78,8 @@ YAZ_EXPORT int wrbuf_iconv_puts(WRBUF b, yaz_iconv_t cd, const char *strz); YAZ_EXPORT int wrbuf_iconv_putchar(WRBUF b, yaz_iconv_t cd, int ch); +YAZ_EXPORT void wrbuf_iconv_reset(WRBUF b, yaz_iconv_t cd); + YAZ_EXPORT void wrbuf_chop_right(WRBUF b); /** \brief cut size of WRBUF */ diff --git a/src/marcdisp.c b/src/marcdisp.c index 6544559..33ec6f9 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.48 2007-03-19 14:40:07 adam Exp $ + * $Id: marcdisp.c,v 1.49 2007-03-20 21:37:32 adam Exp $ */ /** @@ -128,15 +128,7 @@ NMEM yaz_marc_get_nmem(yaz_marc_t mt) static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr) { - if (mt->iconv_cd) - { - char outbuf[12]; - size_t outbytesleft = sizeof(outbuf); - char *outp = outbuf; - size_t r = yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft); - if (r != (size_t) (-1)) - wrbuf_write(wr, outbuf, outp - outbuf); - } + wrbuf_iconv_reset(wr, mt->iconv_cd); } static int marc_exec_leader(const char *leader_spec, char *leader, @@ -491,6 +483,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) wrbuf_puts(wr, "("); wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); + marc_iconv_reset(mt, wr); wrbuf_puts(wr, ")\n"); break; case YAZ_MARC_LEADER: @@ -847,6 +840,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) } /* write dummy FS (makes MARC-8 to become ASCII) */ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); + marc_iconv_reset(mt, wr_data_tmp); data_length += wrbuf_len(wr_data_tmp); break; case YAZ_MARC_CONTROLFIELD: @@ -857,6 +851,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) n->u.controlfield.data); marc_iconv_reset(mt, wr_data_tmp); wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */ + marc_iconv_reset(mt, wr_data_tmp); data_length += wrbuf_len(wr_data_tmp); break; case YAZ_MARC_COMMENT: diff --git a/src/siconv.c b/src/siconv.c index 8f61f4a..26f3678 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.36 2007-03-17 00:10:40 adam Exp $ + * $Id: siconv.c,v 1.37 2007-03-20 21:37:32 adam Exp $ */ /** * \file siconv.c @@ -83,8 +83,7 @@ struct yaz_iconv_struct { unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf, size_t inbytesleft, size_t *no_read); size_t (*write_handle)(yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last); + char **outbuf, size_t *outbytesleft); size_t (*flush_handle)(yaz_iconv_t cd, char **outbuf, size_t *outbytesleft); int marc8_esc_mode; @@ -616,8 +615,7 @@ static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp, } static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { size_t k = 0; unsigned char *out = (unsigned char*) *outbuf; @@ -865,8 +863,7 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp, } static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno); } @@ -931,10 +928,8 @@ size_t yaz_write_UTF8_char(unsigned long x, return 0; } - static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { /* list of two char unicode sequence that, when combined, are equivalent to single unicode chars that can be represented in @@ -969,7 +964,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, cd->compose_char = 0; } - if (!last && x > 32 && x < 127 && cd->compose_char == 0) + if (x > 32 && x < 127 && cd->compose_char == 0) { cd->compose_char = x; return 0; @@ -990,10 +985,27 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, return 0; } +static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd, + char **outbuf, size_t *outbytesleft) +{ + if (cd->compose_char) + { + unsigned char *outp = (unsigned char *) *outbuf; + if (*outbytesleft < 1) + { + cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t)(-1); + } + *outp++ = (unsigned char) cd->compose_char; + (*outbytesleft)--; + *outbuf = (char *) outp; + cd->compose_char = 0; + } + return 0; +} static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { unsigned char *outp = (unsigned char *) *outbuf; if (*outbytesleft >= 4) @@ -1014,8 +1026,7 @@ static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x, } static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { unsigned char *outp = (unsigned char *) *outbuf; if (*outbytesleft >= 4) @@ -1043,7 +1054,7 @@ static unsigned long lookup_marc8(yaz_iconv_t cd, char *utf8_outbuf = utf8_buf; size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r; - r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft, 0); + r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft); if (r == (size_t)(-1)) { cd->my_errno = YAZ_ICONV_EILSEQ; @@ -1211,8 +1222,7 @@ static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { int comb = 0; const char *page_chr = 0; @@ -1242,18 +1252,6 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, return r; cd->write_marc8_last = y; } - if (last) - { - size_t r = flush_combos(cd, outbuf, outbytesleft); - if (r) - { - if (comb) - cd->write_marc8_comb_no--; - else - cd->write_marc8_last = 0; - return r; - } - } return 0; } @@ -1267,8 +1265,7 @@ static size_t yaz_flush_marc8(yaz_iconv_t cd, } static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) + char **outbuf, size_t *outbytesleft) { int i; for (i = 0; latin1_comb[i].x1; i++) @@ -1282,11 +1279,11 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x, int last_ch = cd->write_marc8_last; r = yaz_write_marc8_2(cd, latin1_comb[i].x1, - outbuf, outbytesleft, 0); + outbuf, outbytesleft); if (r) return r; r = yaz_write_marc8_2(cd, latin1_comb[i].x2, - outbuf, outbytesleft, last); + outbuf, outbytesleft); if (r && cd->my_errno == YAZ_ICONV_E2BIG) { /* not enough room. reset output to original values */ @@ -1297,14 +1294,13 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x, return r; } } - return yaz_write_marc8_2(cd, x, outbuf, outbytesleft, last); + return yaz_write_marc8_2(cd, x, outbuf, outbytesleft); } #if HAVE_WCHAR_H -static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft, - int last) +static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x, + char **outbuf, size_t *outbytesleft) { unsigned char *outp = (unsigned char *) *outbuf; @@ -1371,7 +1367,10 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) if (!yaz_matchstr(tocode, "UTF8")) cd->write_handle = yaz_write_UTF8; else if (!yaz_matchstr(tocode, "ISO88591")) + { cd->write_handle = yaz_write_ISO8859_1; + cd->flush_handle = yaz_flush_ISO8859_1; + } else if (!yaz_matchstr (tocode, "UCS4")) cd->write_handle = yaz_write_UCS4; else if (!yaz_matchstr(tocode, "UCS4LE")) @@ -1489,6 +1488,20 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, } cd->init_flag = 0; + if (!inbuf || !*inbuf) + { + if (outbuf && *outbuf) + { + if (cd->unget_x) + r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft); + if (cd->flush_handle) + r = (*cd->flush_handle)(cd, outbuf, outbytesleft); + } + if (r == 0) + cd->init_flag = 1; + cd->unget_x = 0; + return r; + } while (1) { unsigned long x; @@ -1499,34 +1512,24 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, x = cd->unget_x; no_read = cd->no_read_x; } - else if (inbuf && *inbuf) + else { if (*inbytesleft == 0) { r = *inbuf - inbuf0; break; } - x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft, - &no_read); + x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft, + &no_read); if (no_read == 0) { r = (size_t)(-1); break; } } - else - { - r = 0; - if (cd->flush_handle && outbuf && *outbuf) - r = (*cd->flush_handle)(cd, outbuf, outbytesleft); - if (r == 0) - cd->init_flag = 1; - break; - } if (x) { - r = (cd->write_handle)(cd, x, outbuf, outbytesleft, - (*inbytesleft - no_read) == 0 ? 1 : 0); + r = (*cd->write_handle)(cd, x, outbuf, outbytesleft); if (r) { /* unable to write it. save it because read_handle cannot diff --git a/src/wrbuf.c b/src/wrbuf.c index ef2be4c..f90764a 100644 --- a/src/wrbuf.c +++ b/src/wrbuf.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: wrbuf.c,v 1.18 2007-03-19 14:40:07 adam Exp $ + * $Id: wrbuf.c,v 1.19 2007-03-20 21:37:32 adam Exp $ */ /** @@ -213,6 +213,19 @@ int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, const char *buf, int size) return wrbuf_iconv_write_x(b, cd, buf, size, 1); } +void wrbuf_iconv_reset(WRBUF b, yaz_iconv_t cd) +{ + if (cd) + { + char outbuf[12]; + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + size_t r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft); + if (r != (size_t) (-1)) + wrbuf_write(b, outbuf, outp - outbuf); + } +} + const char *wrbuf_cstr(WRBUF b) { wrbuf_putc(b, '\0'); /* add '\0' */ diff --git a/src/zoom-c.c b/src/zoom-c.c index 10cf790..7bed6d7 100644 --- a/src/zoom-c.c +++ b/src/zoom-c.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: zoom-c.c,v 1.118 2007-03-19 20:58:34 adam Exp $ + * $Id: zoom-c.c,v 1.119 2007-03-20 21:37:32 adam Exp $ */ /** * \file zoom-c.c @@ -1284,7 +1284,7 @@ static zoom_ret ZOOM_connection_send_init(ZOOM_connection c) odr_prepend(c->odr_out, "ZOOM-C", ireq->implementationName)); - version = odr_strdup(c->odr_out, "$Revision: 1.118 $"); + version = odr_strdup(c->odr_out, "$Revision: 1.119 $"); if (strlen(version) > 10) /* check for unexpanded CVS strings */ version[strlen(version)-2] = '\0'; ireq->implementationVersion = @@ -1789,6 +1789,7 @@ static const char *record_iconv_return(ZOOM_record rec, int *len, *from = '\0'; strcpy(to, "UTF-8"); + if (record_charset && *record_charset) { /* Use "from,to" or just "from" */ @@ -1810,30 +1811,14 @@ static const char *record_iconv_return(ZOOM_record rec, int *len, if (*from && *to && (cd = yaz_iconv_open(to, from))) { - char outbuf[12]; - size_t inbytesleft = sz; - const char *inp = buf; - if (!rec->wrbuf_iconv) rec->wrbuf_iconv = wrbuf_alloc(); wrbuf_rewind(rec->wrbuf_iconv); - while (inbytesleft) - { - size_t outbytesleft = sizeof(outbuf); - char *outp = outbuf; - size_t r = yaz_iconv(cd, (char**) &inp, - &inbytesleft, - &outp, &outbytesleft); - if (r == (size_t) (-1)) - { - int e = yaz_iconv_error(cd); - if (e != YAZ_ICONV_E2BIG) - break; - } - wrbuf_write(rec->wrbuf_iconv, outbuf, outp - outbuf); - } + wrbuf_iconv_write(rec->wrbuf_iconv, cd, buf, sz); + wrbuf_iconv_reset(rec->wrbuf_iconv, cd); + buf = wrbuf_cstr(rec->wrbuf_iconv); sz = wrbuf_len(rec->wrbuf_iconv); yaz_iconv_close(cd); diff --git a/test/tst_record_conv.c b/test/tst_record_conv.c index 8fab94e..609b61c 100644 --- a/test/tst_record_conv.c +++ b/test/tst_record_conv.c @@ -2,7 +2,7 @@ * Copyright (C) 2005-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: tst_record_conv.c,v 1.15 2007-03-19 22:17:41 adam Exp $ + * $Id: tst_record_conv.c,v 1.16 2007-03-20 21:37:32 adam Exp $ * */ #include @@ -185,8 +185,10 @@ static int conv_convert_test(yaz_record_conv_t p, else if (strcmp(output_expect_record, wrbuf_cstr(output_record))) { ret = 0; - printf("got-output_record = %s\n", wrbuf_cstr(output_record)); - printf("output_expect_record = %s\n", output_expect_record); + printf("got-output_record len=%d: %s\n", + wrbuf_len(output_record),wrbuf_cstr(output_record)); + printf("output_expect_record len=%d %s\n", + strlen(output_expect_record), output_expect_record); } else { diff --git a/test/tsticonv.c b/test/tsticonv.c index 37d3bbf..33b9944 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.28 2007-03-19 14:40:07 adam Exp $ + * $Id: tsticonv.c,v 1.29 2007-03-20 21:37:32 adam Exp $ */ #if HAVE_CONFIG_H @@ -76,8 +76,12 @@ static int tst_convert_l(yaz_iconv_t cd, size_t in_len, const char *in_buf, return 0; } else + { + yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); break; + } } + return compare_buffers("tsticonv 22", 0, expect_len, expect_buf, outbuf - outbuf0, outbuf0); @@ -104,6 +108,14 @@ static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) if (e != YAZ_ICONV_E2BIG) break; } + else + { + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft); + wrbuf_write(b, outbuf, outp - outbuf); + break; + } } if (wrbuf_len(b) == strlen(cmpbuf) && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b))) @@ -266,6 +278,9 @@ static void dconvert(int mandatory, const char *tmpcode) return; r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); YAZ_CHECK(r != (size_t) (-1)); + + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); yaz_iconv_close(cd); if (r == (size_t) (-1)) return; @@ -281,11 +296,19 @@ static void dconvert(int mandatory, const char *tmpcode) outbytesleft = sizeof(outbuf1); r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); YAZ_CHECK(r != (size_t) (-1)); + + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + if (r == (size_t)(-1)) + { + fprintf(stderr, "failed\n"); + } + YAZ_CHECK(r != (size_t) (-1)); + if (r != (size_t)(-1)) { ret = compare_buffers("dconvert", i, strlen(iso_8859_1_a[i]), iso_8859_1_a[i], - sizeof(outbuf1) - outbytesleft, outbuf1); + sizeof(outbuf1) - outbytesleft, outbuf1); YAZ_CHECK(ret); } yaz_iconv_close(cd); -- 1.7.10.4