Bump year
[yaz-moved-to-github.git] / src / siconv.c
index a10fd3d..3b5928d 100644 (file)
@@ -1,11 +1,18 @@
 /*
- * Copyright (c) 1997-2003, Index Data
+ * Copyright (C) 1995-2005, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.2 2004-03-11 10:09:11 oleg Exp $
+ * $Id: siconv.c,v 1.8 2005-01-15 19:47:14 adam Exp $
+ */
+/**
+ * \file siconv.c
+ * \brief Implements simple ICONV
+ *
+ * This implements an interface similar to that of iconv and
+ * is used by YAZ to interface with iconv (if present).
+ * For systems where iconv is not present, this layer
+ * provides a few important conversion: UTF-8, MARC-8, Latin-1.
  */
-
-/* mini iconv and wrapper for system iconv library (if present) */
 
 #if HAVE_CONFIG_H
 #include <config.h>
 
 #include <yaz/yaz-util.h>
 
-unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
-                              size_t *no_read);
+unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft,
+                             size_t *no_read, int *combining);
+unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
     
 struct yaz_iconv_struct {
     int my_errno;
@@ -36,6 +59,9 @@ struct yaz_iconv_struct {
                                  size_t inbytesleft, size_t *no_read);
     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
                            char **outbuf, size_t *outbytesleft);
+    int marc8_esc_mode;
+    int marc8_comb_x;
+    int marc8_comb_no_read;
 #if HAVE_ICONV_H
     iconv_t iconv_cd;
 #endif
@@ -216,7 +242,102 @@ static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
                                      size_t inbytesleft, size_t *no_read)
 {
-    return yaz_marc8_conv(inp, inbytesleft, no_read);
+    if (cd->marc8_comb_x)
+    {
+       unsigned long x = cd->marc8_comb_x;
+       *no_read = cd->marc8_comb_no_read;
+       cd->marc8_comb_x = 0;
+       return x;
+    }
+    *no_read = 0;
+    while(inbytesleft >= 1 && inp[0] == 27)
+    {
+       size_t inbytesleft0 = inbytesleft;
+       inp++;
+       inbytesleft--;
+       while(inbytesleft > 0 && strchr("(,$!", *inp))
+       {
+           inbytesleft--;
+           inp++;
+       }
+       if (inbytesleft <= 0)
+       {
+           *no_read = 0;
+           cd->my_errno = YAZ_ICONV_EINVAL;
+           return 0;
+       }
+       cd->marc8_esc_mode = *inp++;
+       inbytesleft--;
+       (*no_read) += inbytesleft0 - inbytesleft;
+    }
+    if (inbytesleft <= 0)
+       return 0;
+    else
+    {
+       unsigned long x;
+       int comb = 0;
+       size_t no_read_sub = 0;
+
+       switch(cd->marc8_esc_mode)
+       {
+       case 'B':  /* Basic ASCII */
+       case 'E':  /* ANSEL */
+       case 's':  /* ASCII */
+           x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case 'g':  /* Greek */
+           x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case 'b':  /* Subscripts */
+           x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case 'p':  /* Superscripts */
+           x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case '2':  /* Basic Hebrew */
+           x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case 'N':  /* Basic Cyrillic */
+       case 'Q':  /* Extended Cyrillic */
+           x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case '3':  /* Basic Arabic */
+       case '4':  /* Extended Arabic */
+           x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case 'S':  /* Greek */
+           x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       case '1':  /* Chinese, Japanese, Korean (EACC) */
+           x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, &comb);
+           break;
+       default:
+           *no_read = 0;
+           cd->my_errno = YAZ_ICONV_EILSEQ;
+           return 0;
+       }
+#if 0
+       printf ("esc mode=%c x=%04lX comb=%d\n", cd->marc8_esc_mode, x, comb);
+#endif
+       *no_read += no_read_sub;
+
+       if (comb && cd->marc8_comb_x == 0)
+       {
+           size_t tmp_read = 0;
+           unsigned long next_x;
+
+           /* read next char .. */
+           next_x = yaz_read_marc8(cd, inp + *no_read,
+                                   inbytesleft - *no_read, &tmp_read);
+           /* save this x for later .. */
+           cd->marc8_comb_x = x;
+           /* save next read for later .. */
+           cd->marc8_comb_no_read = tmp_read;
+           /* return next x - thereby swap */
+           x = next_x;
+       }
+       return x;
+    }
 }
 
 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
@@ -379,6 +500,8 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
     cd->read_handle = 0;
     cd->init_handle = 0;
     cd->my_errno = YAZ_ICONV_UNKNOWN;
+    cd->marc8_esc_mode = 'B';
+    cd->marc8_comb_x = 0;
 
     /* a useful hack: if fromcode has leading @,
        the library not use YAZ's own conversions .. */
@@ -514,9 +637,12 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
             r = (size_t)(-1);
             break;
         }
-        r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
-        if (r)
-            break;
+       if (x)
+       {
+           r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
+           if (r)
+               break;
+       }
         *inbytesleft -= no_read;
         (*inbuf) += no_read;
     }