Fix sample PQF
[yaz-moved-to-github.git] / util / siconv.c
index a40cc0c..73d7148 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 1997-2002, Index Data
+ * Copyright (c) 1997-2003, Index Data
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.3 2002-08-28 19:34:36 adam Exp $
+ * $Id: siconv.c,v 1.9 2003-01-06 08:20:28 adam Exp $
  */
 
 /* mini iconv and wrapper for system iconv library (if present) */
@@ -14,6 +14,9 @@
 #include <errno.h>
 #include <string.h>
 #include <ctype.h>
+#if HAVE_WCHAR_H
+#include <wchar.h>
+#endif
 
 #if HAVE_ICONV_H
 #include <iconv.h>
@@ -21,6 +24,9 @@
 
 #include <yaz/yaz-util.h>
 
+unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
+                              size_t *no_read);
+    
 struct yaz_iconv_struct {
     int my_errno;
     int init_flag;
@@ -185,53 +191,81 @@ static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
     return x;
 }
 
+#if HAVE_WCHAR_H
+static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
+                                       size_t inbytesleft, size_t *no_read)
+{
+    unsigned long x = 0;
+    
+    if (inbytesleft < sizeof(wchar_t))
+    {
+        cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
+        *no_read = 0;
+    }
+    else
+    {
+        wchar_t wch;
+        memcpy (&wch, inp, sizeof(wch));
+        x = wch;
+        *no_read = sizeof(wch);
+    }
+    return x;
+}
+#endif
+
+static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
+                                     size_t inbytesleft, size_t *no_read)
+{
+    return yaz_marc8_conv(inp, inbytesleft, no_read);
+}
+
 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
                               char **outbuf, size_t *outbytesleft)
 {
-    unsigned char *outp = *outbuf;
+    unsigned char *outp = (unsigned char *) *outbuf;
     if (x <= 0x7f && *outbytesleft >= 1)
     {
-        *outp++ = x;
+        *outp++ = (unsigned char) x;
         (*outbytesleft)--;
     } 
     else if (x <= 0x7ff && *outbytesleft >= 2)
     {
-        *outp++ = (x >> 6) | 0xc0;
-        *outp++ = (x & 0x3f) | 0x80;
+        *outp++ = (unsigned char) ((x >> 6) | 0xc0);
+        *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
         (*outbytesleft) -= 2;
     }
     else if (x <= 0xffff && *outbytesleft >= 3)
     {
-        *outp++ = (x >> 12) | 0xe0;
-        *outp++ = ((x >> 6) & 0x3f) | 0x80;
-        *outp++ = (x & 0x3f) | 0x80;
+        *outp++ = (unsigned char) ((x >> 12) | 0xe0);
+        *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
         (*outbytesleft) -= 3;
     }
     else if (x <= 0x1fffff && *outbytesleft >= 4)
     {
-        *outp++ = (x >> 18) | 0xf0;
-        *outp++ = ((x >> 12) & 0x3f) | 0x80;
-        *outp++ = ((x >> 6)  & 0x3f) | 0x80;
-        *outp++ = (x & 0x3f) | 0x80;
+        *outp++ = (unsigned char) ((x >> 18) | 0xf0);
+        *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
+        *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
         (*outbytesleft) -= 4;
     }
     else if (x <= 0x3ffffff && *outbytesleft >= 5)
     {
-        *outp++ = (x >> 24) | 0xf8;
-        *outp++ = ((x >> 18) & 0x3f) | 0x80;
-        *outp++ = ((x >> 12) & 0x3f) | 0x80;
-        *outp++ = ((x >> 6)  & 0x3f) | 0x80;
-        *outp++ = (x & 0x3f) | 0x80;
+        *outp++ = (unsigned char) ((x >> 24) | 0xf8);
+        *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
+        *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
         (*outbytesleft) -= 5;
     }
     else if (*outbytesleft >= 6)
     {
-        *outp++ = (x >> 30) | 0xfc;
-        *outp++ = ((x >> 24) & 0x3f) | 0x80;
-        *outp++ = ((x >> 18) & 0x3f) | 0x80;
-        *outp++ = ((x >> 12) & 0x3f) | 0x80;
-        *outp++ = ((x >> 6)  & 0x3f) | 0x80;
-        *outp++ = (x & 0x3f) | 0x80;
+        *outp++ = (unsigned char) ((x >> 30) | 0xfc);
+        *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
+        *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
+        *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
         (*outbytesleft) -= 6;
     }
     else 
@@ -239,14 +273,14 @@ static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
         return (size_t)(-1);
     }
-    *outbuf = outp;
+    *outbuf = (char *) outp;
     return 0;
 }
 
 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
                                    char **outbuf, size_t *outbytesleft)
 {
-    unsigned char *outp = *outbuf;
+    unsigned char *outp = (unsigned char *) *outbuf;
     if (x > 255 || x < 1)
     {
         cd->my_errno = YAZ_ICONV_EILSEQ;
@@ -254,7 +288,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
     }
     else if (*outbytesleft >= 1)
     {
-        *outp++ = x;
+        *outp++ = (unsigned char) x;
         (*outbytesleft)--;
     }
     else 
@@ -262,7 +296,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
         cd->my_errno = YAZ_ICONV_E2BIG;
         return (size_t)(-1);
     }
-    *outbuf = outp;
+    *outbuf = (char *) outp;
     return 0;
 }
 
@@ -270,13 +304,13 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
                               char **outbuf, size_t *outbytesleft)
 {
-    unsigned char *outp = *outbuf;
+    unsigned char *outp = (unsigned char *) *outbuf;
     if (*outbytesleft >= 4)
     {
-        *outp++ = x<<24;
-        *outp++ = x<<16;
-        *outp++ = x<<8;
-        *outp++ = x;
+        *outp++ = (unsigned char) (x<<24);
+        *outp++ = (unsigned char) (x<<16);
+        *outp++ = (unsigned char) (x<<8);
+        *outp++ = (unsigned char) x;
         (*outbytesleft) -= 4;
     }
     else
@@ -284,20 +318,20 @@ static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
         cd->my_errno = YAZ_ICONV_E2BIG;
         return (size_t)(-1);
     }
-    *outbuf = outp;
+    *outbuf = (char *) outp;
     return 0;
 }
 
 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
                                 char **outbuf, size_t *outbytesleft)
 {
-    unsigned char *outp = *outbuf;
+    unsigned char *outp = (unsigned char *) *outbuf;
     if (*outbytesleft >= 4)
     {
-        *outp++ = x;
-        *outp++ = x<<8;
-        *outp++ = x<<16;
-        *outp++ = x<<24;
+        *outp++ = (unsigned char) x;
+        *outp++ = (unsigned char) (x<<8);
+        *outp++ = (unsigned char) (x<<16);
+        *outp++ = (unsigned char) (x<<24);
         (*outbytesleft) -= 4;
     }
     else
@@ -305,40 +339,84 @@ static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
         cd->my_errno = YAZ_ICONV_E2BIG;
         return (size_t)(-1);
     }
-    *outbuf = outp;
+    *outbuf = (char *) outp;
     return 0;
 }
 
+#if HAVE_WCHAR_H
+static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
+                                 char **outbuf, size_t *outbytesleft)
+{
+    unsigned char *outp = (unsigned char *) *outbuf;
+
+    if (*outbytesleft >= sizeof(wchar_t))
+    {
+        wchar_t wch = x;
+        memcpy(outp, &wch, sizeof(wch));
+        outp += sizeof(wch);
+        (*outbytesleft) -= sizeof(wch);
+    }
+    else
+    {
+        cd->my_errno = YAZ_ICONV_E2BIG;
+        return (size_t)(-1);
+    }
+    *outbuf = (char *) outp;
+    return 0;
+}
+#endif
+
+int yaz_iconv_isbuiltin(yaz_iconv_t cd)
+{
+    return cd->read_handle && cd->write_handle;
+}
+
 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
 {
-    yaz_iconv_t cd = xmalloc (sizeof(*cd));
+    yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
 
     cd->write_handle = 0;
     cd->read_handle = 0;
     cd->init_handle = 0;
     cd->my_errno = YAZ_ICONV_UNKNOWN;
 
-    if (!yaz_matchstr(fromcode, "UTF8"))
+    /* a useful hack: if fromcode has leading @,
+       the library not use YAZ's own conversions .. */
+    if (fromcode[0] == '@')
+        fromcode++;
+    else
     {
-        cd->read_handle = yaz_read_UTF8;
-        cd->init_handle = yaz_init_UTF8;
+        if (!yaz_matchstr(fromcode, "UTF8"))
+        {
+            cd->read_handle = yaz_read_UTF8;
+            cd->init_handle = yaz_init_UTF8;
+        }
+        else if (!yaz_matchstr(fromcode, "ISO88591"))
+            cd->read_handle = yaz_read_ISO8859_1;
+        else if (!yaz_matchstr(fromcode, "UCS4"))
+            cd->read_handle = yaz_read_UCS4;
+        else if (!yaz_matchstr(fromcode, "UCS4LE"))
+            cd->read_handle = yaz_read_UCS4LE;
+        else if (!yaz_matchstr(fromcode, "MARC8"))
+            cd->read_handle = yaz_read_marc8;
+#if HAVE_WCHAR_H
+        else if (!yaz_matchstr(fromcode, "WCHAR_T"))
+            cd->read_handle = yaz_read_wchar_t;
+#endif
+        
+        if (!yaz_matchstr(tocode, "UTF8"))
+            cd->write_handle = yaz_write_UTF8;
+        else if (!yaz_matchstr(tocode, "ISO88591"))
+            cd->write_handle = yaz_write_ISO8859_1;
+        else if (!yaz_matchstr (tocode, "UCS4"))
+            cd->write_handle = yaz_write_UCS4;
+        else if (!yaz_matchstr(tocode, "UCS4LE"))
+            cd->write_handle = yaz_write_UCS4LE;
+#if HAVE_WCHAR_H
+        else if (!yaz_matchstr(tocode, "WCHAR_T"))
+            cd->write_handle = yaz_write_wchar_t;
+#endif
     }
-    else if (!yaz_matchstr(fromcode, "ISO88591"))
-        cd->read_handle = yaz_read_ISO8859_1;
-    else if (!yaz_matchstr(fromcode, "UCS4"))
-        cd->read_handle = yaz_read_UCS4;
-    else if (!yaz_matchstr(fromcode, "UCS4LE"))
-        cd->read_handle = yaz_read_UCS4LE;
-    
-    if (!yaz_matchstr(tocode, "UTF8"))
-        cd->write_handle = yaz_write_UTF8;
-    else if (!yaz_matchstr(tocode, "ISO88591"))
-        cd->write_handle = yaz_write_ISO8859_1;
-    else if (!yaz_matchstr (tocode, "UCS4"))
-        cd->write_handle = yaz_write_UCS4;
-    else if (!yaz_matchstr(tocode, "UCS4LE"))
-        cd->write_handle = yaz_write_UCS4LE;
-
 #if HAVE_ICONV_H
     cd->iconv_cd = 0;
     if (!cd->read_handle || !cd->write_handle)
@@ -373,7 +451,7 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
         if (r == (size_t)(-1))
         {
-            switch (errno)
+            switch (yaz_errno())
             {
             case E2BIG:
                 cd->my_errno = YAZ_ICONV_E2BIG;
@@ -404,7 +482,8 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
         if (cd->init_handle)
         {
             size_t no_read;
-            size_t r = (cd->init_handle)(cd, *inbuf, *inbytesleft, &no_read);
+            size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
+                                         *inbytesleft, &no_read);
             if (r)
             {
                 if (cd->my_errno == YAZ_ICONV_EINVAL)
@@ -428,7 +507,8 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
             break;
         }
         
-        x = (cd->read_handle)(cd, *inbuf, *inbytesleft, &no_read);
+        x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
+                              &no_read);
         if (no_read == 0)
         {
             r = (size_t)(-1);