Attempted fix of bug #976: Segfault in yaz_iconv. The yaz_iconv function
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 20 Mar 2007 21:37:31 +0000 (21:37 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 20 Mar 2007 21:37:31 +0000 (21:37 +0000)
write handlers no longer carries a 'last' parameter. This will make
yaz_iconv flush "less" characters. A flush is performed by call to
yaz_iconv(cd, 0, 0, &outbut, &outbytesleft) .

NEWS
include/yaz/wrbuf.h
src/marcdisp.c
src/siconv.c
src/wrbuf.c
src/zoom-c.c
test/tst_record_conv.c
test/tsticonv.c

diff --git a/NEWS b/NEWS
index 200cb5b..f3bf309 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,8 @@
+Attempted fix of bug #976: Segfault in yaz_iconv. The yaz_iconv function
+write handlers no longer carries a 'last' parameter. This will make
+yaz_iconv flush "less" characters. A flush is performed by call to
+yaz_iconv(cd, 0, 0, &outbut, &outbytesleft) .
+
 Definition of wrbuf_diags moved to querytowrbuf.h. Function wrbuf_put_zquery
 removed, because function yaz_query_to_wrbuf does the same.
 
index e3c5de3..56a4023 100644 (file)
@@ -24,7 +24,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/* $Id: wrbuf.h,v 1.25 2007-03-19 14:40:06 adam Exp $ */
+/* $Id: wrbuf.h,v 1.26 2007-03-20 21:37:31 adam Exp $ */
 
 /**
  * \file wrbuf.h
@@ -78,6 +78,8 @@ YAZ_EXPORT int wrbuf_iconv_puts(WRBUF b, yaz_iconv_t cd, const char *strz);
 
 YAZ_EXPORT int wrbuf_iconv_putchar(WRBUF b, yaz_iconv_t cd, int ch);
 
+YAZ_EXPORT void wrbuf_iconv_reset(WRBUF b, yaz_iconv_t cd);
+
 YAZ_EXPORT void wrbuf_chop_right(WRBUF b);
 
 /** \brief cut size of WRBUF */
index 6544559..33ec6f9 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: marcdisp.c,v 1.48 2007-03-19 14:40:07 adam Exp $
+ * $Id: marcdisp.c,v 1.49 2007-03-20 21:37:32 adam Exp $
  */
 
 /**
@@ -128,15 +128,7 @@ NMEM yaz_marc_get_nmem(yaz_marc_t mt)
 
 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
 {
-    if (mt->iconv_cd)
-    {
-        char outbuf[12];
-        size_t outbytesleft = sizeof(outbuf);
-        char *outp = outbuf;
-        size_t r = yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
-        if (r != (size_t) (-1))
-            wrbuf_write(wr, outbuf, outp - outbuf);
-    }
+    wrbuf_iconv_reset(wr, mt->iconv_cd);
 }
 
 static int marc_exec_leader(const char *leader_spec, char *leader,
@@ -491,6 +483,7 @@ int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
             wrbuf_puts(wr, "(");
             wrbuf_iconv_write(wr, mt->iconv_cd, 
                               n->u.comment, strlen(n->u.comment));
+            marc_iconv_reset(mt, wr);
             wrbuf_puts(wr, ")\n");
             break;
         case YAZ_MARC_LEADER:
@@ -847,6 +840,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
             }
             /* write dummy FS (makes MARC-8 to become ASCII) */
             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
+            marc_iconv_reset(mt, wr_data_tmp);
             data_length += wrbuf_len(wr_data_tmp);
             break;
         case YAZ_MARC_CONTROLFIELD:
@@ -857,6 +851,7 @@ int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
                              n->u.controlfield.data);
             marc_iconv_reset(mt, wr_data_tmp);
             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
+            marc_iconv_reset(mt, wr_data_tmp);
             data_length += wrbuf_len(wr_data_tmp);
             break;
         case YAZ_MARC_COMMENT:
index 8f61f4a..26f3678 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.36 2007-03-17 00:10:40 adam Exp $
+ * $Id: siconv.c,v 1.37 2007-03-20 21:37:32 adam Exp $
  */
 /**
  * \file siconv.c
@@ -83,8 +83,7 @@ struct yaz_iconv_struct {
     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
                                  size_t inbytesleft, size_t *no_read);
     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
-                           char **outbuf, size_t *outbytesleft,
-                           int last);
+                           char **outbuf, size_t *outbytesleft);
     size_t (*flush_handle)(yaz_iconv_t cd,
                            char **outbuf, size_t *outbytesleft);
     int marc8_esc_mode;
@@ -616,8 +615,7 @@ static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
 }
 
 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
-                                     char **outbuf, size_t *outbytesleft,
-                                     int last)
+                                     char **outbuf, size_t *outbytesleft)
 {
     size_t k = 0;
     unsigned char *out = (unsigned char*) *outbuf;
@@ -865,8 +863,7 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
 }
 
 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
-                             char **outbuf, size_t *outbytesleft,
-                             int last)
+                             char **outbuf, size_t *outbytesleft)
 {
     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
 }
@@ -931,10 +928,8 @@ size_t yaz_write_UTF8_char(unsigned long x,
     return 0;
 }
 
-
 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
-                                   char **outbuf, size_t *outbytesleft,
-                                   int last)
+                                   char **outbuf, size_t *outbytesleft)
 {
     /* list of two char unicode sequence that, when combined, are
        equivalent to single unicode chars that can be represented in
@@ -969,7 +964,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
         cd->compose_char = 0;
     }
 
-    if (!last && x > 32 && x < 127 && cd->compose_char == 0)
+    if (x > 32 && x < 127 && cd->compose_char == 0)
     {
         cd->compose_char = x;
         return 0;
@@ -990,10 +985,27 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
     return 0;
 }
 
+static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
+                                  char **outbuf, size_t *outbytesleft)
+{
+    if (cd->compose_char)
+    {
+        unsigned char *outp = (unsigned char *) *outbuf;
+        if (*outbytesleft < 1)
+        {
+            cd->my_errno = YAZ_ICONV_E2BIG;
+            return (size_t)(-1);
+        }
+        *outp++ = (unsigned char) cd->compose_char;
+        (*outbytesleft)--;
+        *outbuf = (char *) outp;
+        cd->compose_char = 0;
+    }
+    return 0;
+}
 
 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
-                              char **outbuf, size_t *outbytesleft,
-                              int last)
+                              char **outbuf, size_t *outbytesleft)
 {
     unsigned char *outp = (unsigned char *) *outbuf;
     if (*outbytesleft >= 4)
@@ -1014,8 +1026,7 @@ static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
 }
 
 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
-                                char **outbuf, size_t *outbytesleft,
-                                int last)
+                                char **outbuf, size_t *outbytesleft)
 {
     unsigned char *outp = (unsigned char *) *outbuf;
     if (*outbytesleft >= 4)
@@ -1043,7 +1054,7 @@ static unsigned long lookup_marc8(yaz_iconv_t cd,
     char *utf8_outbuf = utf8_buf;
     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
 
-    r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft, 0);
+    r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
     if (r == (size_t)(-1))
     {
         cd->my_errno = YAZ_ICONV_EILSEQ;
@@ -1211,8 +1222,7 @@ static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
 
 
 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
-                                char **outbuf, size_t *outbytesleft,
-                                int last)
+                                char **outbuf, size_t *outbytesleft)
 {
     int comb = 0;
     const char *page_chr = 0;
@@ -1242,18 +1252,6 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
             return r;
         cd->write_marc8_last = y;
     }
-    if (last)
-    {
-        size_t r = flush_combos(cd, outbuf, outbytesleft);
-        if (r)
-        {
-            if (comb)
-                cd->write_marc8_comb_no--;
-            else
-                cd->write_marc8_last = 0;
-            return r;
-        }
-    }
     return 0;
 }
 
@@ -1267,8 +1265,7 @@ static size_t yaz_flush_marc8(yaz_iconv_t cd,
 }
 
 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
-                              char **outbuf, size_t *outbytesleft,
-                              int last)
+                              char **outbuf, size_t *outbytesleft)
 {
     int i;
     for (i = 0; latin1_comb[i].x1; i++)
@@ -1282,11 +1279,11 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
             int last_ch = cd->write_marc8_last;
 
             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
-                                  outbuf, outbytesleft, 0);
+                                  outbuf, outbytesleft);
             if (r)
                 return r;
             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
-                                  outbuf, outbytesleft, last);
+                                  outbuf, outbytesleft);
             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
             {
                 /* not enough room. reset output to original values */
@@ -1297,14 +1294,13 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
             return r;
         }
     }
-    return yaz_write_marc8_2(cd, x, outbuf, outbytesleft, last);
+    return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
 }
 
 
 #if HAVE_WCHAR_H
-static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
-                                 char **outbuf, size_t *outbytesleft,
-                                 int last)
+static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
+                                char **outbuf, size_t *outbytesleft)
 {
     unsigned char *outp = (unsigned char *) *outbuf;
 
@@ -1371,7 +1367,10 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
         if (!yaz_matchstr(tocode, "UTF8"))
             cd->write_handle = yaz_write_UTF8;
         else if (!yaz_matchstr(tocode, "ISO88591"))
+        {
             cd->write_handle = yaz_write_ISO8859_1;
+            cd->flush_handle = yaz_flush_ISO8859_1;
+        }
         else if (!yaz_matchstr (tocode, "UCS4"))
             cd->write_handle = yaz_write_UCS4;
         else if (!yaz_matchstr(tocode, "UCS4LE"))
@@ -1489,6 +1488,20 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
     }
     cd->init_flag = 0;
 
+    if (!inbuf || !*inbuf)
+    {
+        if (outbuf && *outbuf)
+        {
+            if (cd->unget_x)
+                r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
+            if (cd->flush_handle)
+                r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
+        }
+        if (r == 0)
+            cd->init_flag = 1;
+        cd->unget_x = 0;
+        return r;
+    }
     while (1)
     {
         unsigned long x;
@@ -1499,34 +1512,24 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
             x = cd->unget_x;
             no_read = cd->no_read_x;
         }
-        else if (inbuf && *inbuf)
+        else
         {
             if (*inbytesleft == 0)
             {
                 r = *inbuf - inbuf0;
                 break;
             }
-            x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
-                                  &no_read);
+            x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
+                                   &no_read);
             if (no_read == 0)
             {
                 r = (size_t)(-1);
                 break;
             }
         }
-        else
-        {
-            r = 0;
-            if (cd->flush_handle && outbuf && *outbuf)
-                r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
-            if (r == 0)
-                cd->init_flag = 1;
-            break;
-        }
         if (x)
         {
-            r = (cd->write_handle)(cd, x, outbuf, outbytesleft,
-                                   (*inbytesleft - no_read) == 0 ? 1 : 0);
+            r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
             if (r)
             {
                 /* unable to write it. save it because read_handle cannot
index ef2be4c..f90764a 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: wrbuf.c,v 1.18 2007-03-19 14:40:07 adam Exp $
+ * $Id: wrbuf.c,v 1.19 2007-03-20 21:37:32 adam Exp $
  */
 
 /**
@@ -213,6 +213,19 @@ int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, const char *buf, int size)
     return wrbuf_iconv_write_x(b, cd, buf, size, 1);
 }
 
+void wrbuf_iconv_reset(WRBUF b, yaz_iconv_t cd)
+{
+    if (cd)
+    {
+        char outbuf[12];
+        size_t outbytesleft = sizeof(outbuf);
+        char *outp = outbuf;
+        size_t r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft);
+        if (r != (size_t) (-1))
+            wrbuf_write(b, outbuf, outp - outbuf);
+    }
+}
+
 const char *wrbuf_cstr(WRBUF b)
 {
     wrbuf_putc(b, '\0');   /* add '\0' */
index 10cf790..7bed6d7 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: zoom-c.c,v 1.118 2007-03-19 20:58:34 adam Exp $
+ * $Id: zoom-c.c,v 1.119 2007-03-20 21:37:32 adam Exp $
  */
 /**
  * \file zoom-c.c
@@ -1284,7 +1284,7 @@ static zoom_ret ZOOM_connection_send_init(ZOOM_connection c)
                     odr_prepend(c->odr_out, "ZOOM-C",
                                 ireq->implementationName));
     
-    version = odr_strdup(c->odr_out, "$Revision: 1.118 $");
+    version = odr_strdup(c->odr_out, "$Revision: 1.119 $");
     if (strlen(version) > 10)   /* check for unexpanded CVS strings */
         version[strlen(version)-2] = '\0';
     ireq->implementationVersion = 
@@ -1789,6 +1789,7 @@ static const char *record_iconv_return(ZOOM_record rec, int *len,
 
     *from = '\0';
     strcpy(to, "UTF-8");
+
     if (record_charset && *record_charset)
     {
         /* Use "from,to" or just "from" */
@@ -1810,30 +1811,14 @@ static const char *record_iconv_return(ZOOM_record rec, int *len,
 
     if (*from && *to && (cd = yaz_iconv_open(to, from)))
     {
-        char outbuf[12];
-        size_t inbytesleft = sz;
-        const char *inp = buf;
-        
         if (!rec->wrbuf_iconv)
             rec->wrbuf_iconv = wrbuf_alloc();
 
         wrbuf_rewind(rec->wrbuf_iconv);
 
-        while (inbytesleft)
-        {
-            size_t outbytesleft = sizeof(outbuf);
-            char *outp = outbuf;
-            size_t r = yaz_iconv(cd, (char**) &inp,
-                                 &inbytesleft, 
-                                 &outp, &outbytesleft);
-            if (r == (size_t) (-1))
-            {
-                int e = yaz_iconv_error(cd);
-                if (e != YAZ_ICONV_E2BIG)
-                    break;
-            }
-            wrbuf_write(rec->wrbuf_iconv, outbuf, outp - outbuf);
-        }
+        wrbuf_iconv_write(rec->wrbuf_iconv, cd, buf, sz);
+        wrbuf_iconv_reset(rec->wrbuf_iconv, cd);
+
         buf = wrbuf_cstr(rec->wrbuf_iconv);
         sz = wrbuf_len(rec->wrbuf_iconv);
         yaz_iconv_close(cd);
index 8fab94e..609b61c 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 2005-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: tst_record_conv.c,v 1.15 2007-03-19 22:17:41 adam Exp $
+ * $Id: tst_record_conv.c,v 1.16 2007-03-20 21:37:32 adam Exp $
  *
  */
 #include <yaz/record_conv.h>
@@ -185,8 +185,10 @@ static int conv_convert_test(yaz_record_conv_t p,
             else if (strcmp(output_expect_record, wrbuf_cstr(output_record)))
             {
                 ret = 0;
-                printf("got-output_record = %s\n", wrbuf_cstr(output_record));
-                printf("output_expect_record = %s\n", output_expect_record);
+                printf("got-output_record len=%d: %s\n", 
+                       wrbuf_len(output_record),wrbuf_cstr(output_record));
+                printf("output_expect_record len=%d %s\n",
+                       strlen(output_expect_record), output_expect_record);
             }
             else
             {
index 37d3bbf..33b9944 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: tsticonv.c,v 1.28 2007-03-19 14:40:07 adam Exp $
+ * $Id: tsticonv.c,v 1.29 2007-03-20 21:37:32 adam Exp $
  */
 
 #if HAVE_CONFIG_H
@@ -76,8 +76,12 @@ static int tst_convert_l(yaz_iconv_t cd, size_t in_len, const char *in_buf,
                 return 0;
         }
         else
+        {
+            yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft);
             break;
+        }
     }
+
     return compare_buffers("tsticonv 22", 0,
                            expect_len, expect_buf,
                            outbuf - outbuf0, outbuf0);
@@ -104,6 +108,14 @@ static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf)
             if (e != YAZ_ICONV_E2BIG)
                 break;
         }
+        else
+        {
+            size_t outbytesleft = sizeof(outbuf);
+            char *outp = outbuf;
+            r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft);
+            wrbuf_write(b, outbuf, outp - outbuf);
+            break;
+        }
     }
     if (wrbuf_len(b) == strlen(cmpbuf) 
         && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b)))
@@ -266,6 +278,9 @@ static void dconvert(int mandatory, const char *tmpcode)
             return;
         r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
         YAZ_CHECK(r != (size_t) (-1));
+
+        r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft);
+        YAZ_CHECK(r != (size_t) (-1));
         yaz_iconv_close(cd);
         if (r == (size_t) (-1))
             return;
@@ -281,11 +296,19 @@ static void dconvert(int mandatory, const char *tmpcode)
         outbytesleft = sizeof(outbuf1);
         r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
         YAZ_CHECK(r != (size_t) (-1));
+
+        r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft);
+        if (r == (size_t)(-1))
+        {
+            fprintf(stderr, "failed\n");
+        }
+        YAZ_CHECK(r != (size_t) (-1));
+
         if (r != (size_t)(-1)) 
         {
             ret = compare_buffers("dconvert", i,
                                   strlen(iso_8859_1_a[i]), iso_8859_1_a[i],
-                              sizeof(outbuf1) - outbytesleft, outbuf1);
+                                  sizeof(outbuf1) - outbytesleft, outbuf1);
             YAZ_CHECK(ret);
         }
         yaz_iconv_close(cd);