Better rule for "fuzzy"
[yaz-moved-to-github.git] / src / siconv.c
index 9e5393b..25af4cc 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2005, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.15 2005-11-06 01:28:09 adam Exp $
+ * $Id: siconv.c,v 1.18 2006-03-25 14:41:53 adam Exp $
  */
 /**
  * \file siconv.c
@@ -133,7 +133,7 @@ static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
     else if (inp[0] <= 0xef && inbytesleft >= 3)
     {
         x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
-            (inp[1] & 0x3f);
+            (inp[2] & 0x3f);
         if (x >= 0x800)
             *no_read = 3;
         else
@@ -265,6 +265,13 @@ static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
            We'll increment the no_read counter by 1, since we want to skip over
            the processing of the closing ligature character
         */
+        /* this code is no longer necessary.. our handlers code in
+           yaz_marc8_?_conv (generated by charconv.tcl) now returns
+           0 and no_read=1 when a sequence does not match the input.
+           The SECOND HALFs in codetables.xml produces a non-existant
+           entry in the conversion trie.. Hence when met, the input byte is
+           skipped as it should (in yaz_iconv)
+        */
 #if 0
         if (x == 0x0361 || x == 0x0360)
             *no_read += 1;
@@ -369,6 +376,7 @@ static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
                               int last)
 {
     unsigned char *outp = (unsigned char *) *outbuf;
+
     if (x <= 0x7f && *outbytesleft >= 1)
     {
         *outp++ = (unsigned char) x;
@@ -507,12 +515,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
     };
     unsigned char *outp = (unsigned char *) *outbuf;
 
-    if (!last && x > 32 && x < 127 && cd->compose_char == 0)
-    {
-        cd->compose_char = x;
-        return 0;
-    }
-    else if (cd->compose_char)
+    if (cd->compose_char)
     {
         int i;
         for (i = 0; comb[i].x1; i++)
@@ -521,43 +524,38 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
                 x = comb[i].y;
                 break;
             }
+        if (*outbytesleft < 1)
+        {  /* no room. Retain compose_char and bail out */
+            cd->my_errno = YAZ_ICONV_E2BIG;
+            return (size_t)(-1);
+        }
         if (!comb[i].x1) 
-        {   /* not found */
-            if (*outbytesleft >= 1)
-            {
-                *outp++ = (unsigned char) cd->compose_char;
-                (*outbytesleft)--;
-                *outbuf = (char *) outp;
-                if (!last && x > 32 && x < 127)
-                {
-                    cd->compose_char = x;
-                    return 0;
-                }
-            }
-            else
-            {
-                cd->my_errno = YAZ_ICONV_E2BIG;
-                return (size_t)(-1);
-            }
+        {   /* not found. Just write compose_char */
+            *outp++ = (unsigned char) cd->compose_char;
+            (*outbytesleft)--;
+            *outbuf = (char *) outp;
         }
-        /* compose_char and old x combined to one new char: x */
+        /* compose_char used so reset it. x now holds current char */
         cd->compose_char = 0;
     }
-    if (x > 255 || x < 1)
+
+    if (!last && x > 32 && x < 127 && cd->compose_char == 0)
     {
-        cd->my_errno = YAZ_ICONV_EILSEQ;
-        return (size_t) -1;
+        cd->compose_char = x;
+        return 0;
     }
-    else if (*outbytesleft >= 1)
+    else if (x > 255 || x < 1)
     {
-        *outp++ = (unsigned char) x;
-        (*outbytesleft)--;
+        cd->my_errno = YAZ_ICONV_EILSEQ;
+        return (size_t) -1;
     }
-    else 
+    else if (*outbytesleft < 1)
     {
         cd->my_errno = YAZ_ICONV_E2BIG;
         return (size_t)(-1);
     }
+    *outp++ = (unsigned char) x;
+    (*outbytesleft)--;
     *outbuf = (char *) outp;
     return 0;
 }
@@ -712,6 +710,7 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
 {
     char *inbuf0;
     size_t r = 0;
+
 #if HAVE_ICONV_H
     if (cd->iconv_cd)
     {
@@ -799,9 +798,12 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
             {
                 /* unable to write it. save it because read_handle cannot
                    rewind .. */
-                cd->unget_x = x;
-                cd->no_read_x = no_read;
-                break;
+                if (cd->my_errno == YAZ_ICONV_E2BIG)
+                {
+                    cd->unget_x = x;
+                    cd->no_read_x = no_read;
+                    break;
+                }
             }
             cd->unget_x = 0;
         }