Fixed bug #2120: Different greek symbols, UTF-8 to MARC-8.
[yaz-moved-to-github.git] / src / siconv.c
index bdb20ee..17e3d89 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.40 2007-05-03 22:20:45 adam Exp $
+ * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
  */
 /**
  * \file siconv.c
 
 #include <yaz/yaz-util.h>
 
-unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_2_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_3_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_4_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_5_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_6_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_7_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_8_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
-unsigned long yaz_marc8_9_conv(unsigned char *inp, size_t inbytesleft,
+unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
+                               size_t *no_read, int *combining);
+unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
                                size_t *no_read, int *combining);
 
 
-unsigned long yaz_marc8r_1_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_2_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_3_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_4_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_5_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_6_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_7_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_8_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
-unsigned long yaz_marc8r_9_conv(unsigned char *inp, size_t inbytesleft,
-                                size_t *no_read, int *combining);
+unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
+                                 size_t *no_read, int *combining);
+
+#define ESC "\033"
 
 struct yaz_iconv_struct {
     int my_errno;
@@ -86,7 +100,8 @@ struct yaz_iconv_struct {
                            char **outbuf, size_t *outbytesleft);
     size_t (*flush_handle)(yaz_iconv_t cd,
                            char **outbuf, size_t *outbytesleft);
-    int marc8_esc_mode;
+    int g0_mode;
+    int g1_mode;
 
     int comb_offset;
     int comb_size;
@@ -99,11 +114,11 @@ struct yaz_iconv_struct {
 #endif
     unsigned long compose_char;
 
-    unsigned long write_marc8_comb_ch[8];
-    size_t write_marc8_comb_no;
     unsigned write_marc8_second_half_char;
     unsigned long write_marc8_last;
-    const char *write_marc8_page_chr;
+    const char *write_marc8_lpage;
+    const char *write_marc8_g0;
+    const char *write_marc8_g1;
 };
 
 static struct {
@@ -177,6 +192,10 @@ static struct {
     { 0, 0, 0}
 };
 
+static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
+                                       char **outbuf, size_t *outbytesleft,
+                                       const char *page_chr);
+
 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
                                          size_t inbytesleft, size_t *no_read)
 {
@@ -212,6 +231,7 @@ unsigned long yaz_read_UTF8_char(unsigned char *inp,
 {
     unsigned long x = 0;
 
+    *no_read = 0; /* by default */
     if (inp[0] <= 0x7f)
     {
         x = inp[0];
@@ -219,75 +239,86 @@ unsigned long yaz_read_UTF8_char(unsigned char *inp,
     }
     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
     {
-        *no_read = 0;
         *error = YAZ_ICONV_EILSEQ;
     }
     else if (inp[0] <= 0xdf && inbytesleft >= 2)
     {
-        x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
-        if (x >= 0x80)
-            *no_read = 2;
-        else
+        if ((inp[1] & 0xc0) == 0x80)
         {
-            *no_read = 0;
-            *error = YAZ_ICONV_EILSEQ;
+            x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
+            if (x >= 0x80)
+                *no_read = 2;
+            else
+                *error = YAZ_ICONV_EILSEQ;
         }
+        else
+            *error = YAZ_ICONV_EILSEQ;
     }
     else if (inp[0] <= 0xef && inbytesleft >= 3)
     {
-        x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
-            (inp[2] & 0x3f);
-        if (x >= 0x800)
-            *no_read = 3;
-        else
+        if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
         {
-            *no_read = 0;
-            *error = YAZ_ICONV_EILSEQ;
+            x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
+                (inp[2] & 0x3f);
+            if (x >= 0x800)
+                *no_read = 3;
+            else
+                *error = YAZ_ICONV_EILSEQ;
         }
-    }
+        else
+            *error = YAZ_ICONV_EILSEQ;
+    }            
     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
     {
-        x =  ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
-            ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
-        if (x >= 0x10000)
-            *no_read = 4;
-        else
+        if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
+            && (inp[3] & 0xc0) == 0x80)
         {
-            *no_read = 0;
-            *error = YAZ_ICONV_EILSEQ;
+            x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
+                ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
+            if (x >= 0x10000)
+                *no_read = 4;
+            else
+                *error = YAZ_ICONV_EILSEQ;
         }
+        else
+            *error = YAZ_ICONV_EILSEQ;
     }
     else if (inp[0] <= 0xfb && inbytesleft >= 5)
     {
-        x =  ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
-            ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
-            (inp[4] & 0x3f);
-        if (x >= 0x200000)
-            *no_read = 5;
-        else
+        if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
+            && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
         {
-            *no_read = 0;
-            *error = YAZ_ICONV_EILSEQ;
+            x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
+                ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
+                (inp[4] & 0x3f);
+            if (x >= 0x200000)
+                *no_read = 5;
+            else
+                *error = YAZ_ICONV_EILSEQ;
         }
+        else
+            *error = YAZ_ICONV_EILSEQ;
     }
     else if (inp[0] <= 0xfd && inbytesleft >= 6)
     {
-        x =  ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
-            ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
-            ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
-        if (x >= 0x4000000)
-            *no_read = 6;
-        else
+        if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
+            && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
+            && (inp[5] & 0xc0) == 0x80)
         {
-            *no_read = 0;
-            *error = YAZ_ICONV_EILSEQ;
+            x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
+                ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
+                ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
+            if (x >= 0x4000000)
+                *no_read = 6;
+            else
+                *error = YAZ_ICONV_EILSEQ;
         }
+        else
+            *error = YAZ_ICONV_EILSEQ;
     }
     else
-    {
-        *no_read = 0;
-        *error = YAZ_ICONV_EINVAL;
-    }
+        *error = YAZ_ICONV_EINVAL;  /* incomplete sentence */
+
     return x;
 }
 
@@ -1138,64 +1169,93 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
     *no_read = 0;
     while(inbytesleft >= 1 && inp[0] == 27)
     {
+        int ch;
         size_t inbytesleft0 = inbytesleft;
         inp++;
         inbytesleft--;
-        while(inbytesleft > 0 && strchr("(,$!)-", *inp))
+        if (inbytesleft > 0 && *inp == '$')
         {
             inbytesleft--;
             inp++;
         }
-        if (inbytesleft <= 0)
+        if (inbytesleft <= 1)
         {
             *no_read = 0;
             cd->my_errno = YAZ_ICONV_EINVAL;
             return 0;
         }
-        cd->marc8_esc_mode = *inp++;
         inbytesleft--;
+        ch = *inp++;
+        if (inbytesleft > 0 && (ch == '(' || ch == ','))
+        {
+            inbytesleft--;
+            cd->g0_mode = *inp++;
+        }
+        else if (inbytesleft > 0 && (ch == ')' || ch == '-'))
+        {
+            inbytesleft--;
+            cd->g1_mode = *inp++;
+        }
+        else
+            cd->g0_mode = ch;
+
         (*no_read) += inbytesleft0 - inbytesleft;
     }
     if (inbytesleft <= 0)
         return 0;
+    else if (*inp == ' ')
+    {
+        *no_read += 1;
+        return ' ';
+    }
     else
     {
         unsigned long x;
         size_t no_read_sub = 0;
+        int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
         *comb = 0;
 
-        switch(cd->marc8_esc_mode)
+        switch(mode)
         {
         case 'B':  /* Basic ASCII */
-        case 'E':  /* ANSEL */
         case 's':  /* ASCII */
-            x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
+        case 'E':  /* ANSEL */
+            x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
+            if (!x)
+            {
+                no_read_sub = 0;
+                x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
+            }
             break;
         case 'g':  /* Greek */
-            x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case 'b':  /* Subscripts */
-            x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case 'p':  /* Superscripts */
-            x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case '2':  /* Basic Hebrew */
-            x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case 'N':  /* Basic Cyrillic */
+            x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
+            break;
         case 'Q':  /* Extended Cyrillic */
-            x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case '3':  /* Basic Arabic */
+            x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
+            break;
         case '4':  /* Extended Arabic */
-            x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case 'S':  /* Greek */
-            x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         case '1':  /* Chinese, Japanese, Korean (EACC) */
-            x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
+            x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
             break;
         default:
             *no_read = 0;
@@ -1414,59 +1474,71 @@ static unsigned long lookup_marc8(yaz_iconv_t cd,
         *utf8_outbuf = '\0';        
         inp = (unsigned char *) utf8_buf;
         inbytesleft = strlen(utf8_buf);
-        
-        x = yaz_marc8r_1_conv(inp, inbytesleft, &no_read_sub, comb);
+
+        x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033(B";
+            *page_chr = ESC "(B";
             return x;
         }
-        x = yaz_marc8r_2_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033g";
+            *page_chr = ESC "(B";
             return x;
         }
-        x = yaz_marc8r_3_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033b";
+            *page_chr = ESC "b";
             return x;
         }
-        x = yaz_marc8r_4_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033p";
+            *page_chr = ESC "p";
             return x;
         }
-        x = yaz_marc8r_5_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033(2";
+            *page_chr = ESC "(2";
             return x;
         }
-        x = yaz_marc8r_6_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033(N";
+            *page_chr = ESC "(N";
             return x;
         }
-        x = yaz_marc8r_7_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033(3";
+            *page_chr = ESC "(Q";
             return x;
         }
-        x = yaz_marc8r_8_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033(S";
+            *page_chr = ESC "(3";
             return x;
         }
-        x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb);
+        x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033$1";
+            *page_chr = ESC "(4";
+            return x;
+        }
+        x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
+        if (x)
+        {
+            *page_chr = ESC "(S";
+            return x;
+        }
+        x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
+        if (x)
+        {
+            *page_chr = ESC "$1";
             return x;
         }
         cd->my_errno = YAZ_ICONV_EILSEQ;
@@ -1479,12 +1551,21 @@ static size_t flush_combos(yaz_iconv_t cd,
 {
     unsigned long y = cd->write_marc8_last;
     unsigned char byte;
-    char out_buf[10];
-    size_t i, out_no = 0;
+    char out_buf[4];
+    size_t out_no = 0;
 
     if (!y)
         return 0;
 
+    assert(cd->write_marc8_lpage);
+    if (cd->write_marc8_lpage)
+    {
+        size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
+                                            cd->write_marc8_lpage);
+        if (r)
+            return r;
+    }
+
     byte = (unsigned char )((y>>16) & 0xff);
     if (byte)
         out_buf[out_no++] = byte;
@@ -1495,19 +1576,12 @@ static size_t flush_combos(yaz_iconv_t cd,
     if (byte)
         out_buf[out_no++] = byte;
 
-    if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
+    if (out_no + 2 >= *outbytesleft)
     {
         cd->my_errno = YAZ_ICONV_E2BIG;
         return (size_t) (-1);
     }
 
-    for (i = 0; i < cd->write_marc8_comb_no; i++)
-    {
-        /* all MARC-8 combined characters are simple bytes */
-        byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
-        *(*outbuf)++ = byte;
-        (*outbytesleft)--;
-    }
     memcpy(*outbuf, out_buf, out_no);
     *outbuf += out_no;
     (*outbytesleft) -= out_no;
@@ -1518,7 +1592,7 @@ static size_t flush_combos(yaz_iconv_t cd,
     }        
 
     cd->write_marc8_last = 0;
-    cd->write_marc8_comb_no = 0;
+    cd->write_marc8_lpage = 0;
     cd->write_marc8_second_half_char = 0;
     return 0;
 }
@@ -1527,8 +1601,13 @@ static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
                                        char **outbuf, size_t *outbytesleft,
                                        const char *page_chr)
 {
-    const char *old_page_chr = cd->write_marc8_page_chr;
-    if (strcmp(page_chr, old_page_chr))
+    const char **old_page_chr = &cd->write_marc8_g0;
+
+    /* are we going to a G1-set (such as such as ESC ")!E") */
+    if (page_chr && page_chr[1] == ')')
+        old_page_chr = &cd->write_marc8_g1;
+
+    if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
     {
         size_t plen = 0;
         const char *page_out = page_chr;
@@ -1539,24 +1618,27 @@ static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
             
             return (size_t) (-1);
         }
-        cd->write_marc8_page_chr = page_chr;
-        
-        if (!strcmp(old_page_chr, "\033p") 
-            || !strcmp(old_page_chr, "\033g")
-            || !strcmp(old_page_chr, "\033b"))
+
+        if (*old_page_chr)
         {
-            /* Technique 1 leave */
-            page_out = "\033s";
-            if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
+            if (!strcmp(*old_page_chr, ESC "p") 
+                || !strcmp(*old_page_chr, ESC "g")
+                || !strcmp(*old_page_chr, ESC "b"))
             {
-                /* Must leave script + enter new page */
-                plen = strlen(page_out);
-                memcpy(*outbuf, page_out, plen);
-                (*outbuf) += plen;
-                (*outbytesleft) -= plen;
-                page_out = page_chr;
+                page_out = ESC "s";
+                /* Technique 1 leave */
+                if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
+                {
+                    /* Must leave script + enter new page */
+                    plen = strlen(page_out);
+                    memcpy(*outbuf, page_out, plen);
+                    (*outbuf) += plen;
+                    (*outbytesleft) -= plen;
+                    page_out = ESC "(B";
+                }
             }
         }
+        *old_page_chr = page_chr;
         plen = strlen(page_out);
         memcpy(*outbuf, page_out, plen);
         (*outbuf) += plen;
@@ -1578,13 +1660,25 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
 
     if (comb)
     {
+        if (page_chr)
+        {
+            size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
+                                                page_chr);
+            if (r)
+                return r;
+        }
         if (x == 0x0361)
             cd->write_marc8_second_half_char = 0xEC;
         else if (x == 0x0360)
             cd->write_marc8_second_half_char = 0xFB;
 
-        if (cd->write_marc8_comb_no < 6)
-            cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
+        if (*outbytesleft <= 1)
+        {
+            cd->my_errno = YAZ_ICONV_E2BIG;
+            return (size_t) (-1);
+        }
+        *(*outbuf)++ = y;
+        (*outbytesleft)--;
     }
     else
     {
@@ -1592,10 +1686,8 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
         if (r)
             return r;
 
-        r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
-        if (r)
-            return r;
         cd->write_marc8_last = y;
+        cd->write_marc8_lpage = page_chr;
     }
     return 0;
 }
@@ -1606,7 +1698,8 @@ static size_t yaz_flush_marc8(yaz_iconv_t cd,
     size_t r = flush_combos(cd, outbuf, outbytesleft);
     if (r)
         return r;
-    return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
+    cd->write_marc8_g1 = 0;
+    return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
 }
 
 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
@@ -1622,6 +1715,7 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
             char *outbuf0 = *outbuf;
             size_t outbytesleft0 = *outbytesleft;
             int last_ch = cd->write_marc8_last;
+            const char *lpage = cd->write_marc8_lpage;
 
             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
                                   outbuf, outbytesleft);
@@ -1635,6 +1729,7 @@ static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
                 *outbuf = outbuf0;
                 *outbytesleft = outbytesleft0;
                 cd->write_marc8_last = last_ch;
+                cd->write_marc8_lpage = lpage;
             }
             return r;
         }
@@ -1706,6 +1801,8 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
             cd->read_handle = yaz_read_advancegreek;
         else if (!yaz_matchstr(fromcode, "iso54281984"))
             cd->read_handle = yaz_read_iso5428_1984;
+        else if (!yaz_matchstr(fromcode, "iso5428:1984"))
+            cd->read_handle = yaz_read_iso5428_1984;
 #if HAVE_WCHAR_H
         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
             cd->read_handle = yaz_read_wchar_t;
@@ -1740,6 +1837,10 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
         {
             cd->write_handle = yaz_write_iso5428_1984;
         }
+        else if (!yaz_matchstr(tocode, "iso5428:1984"))
+        {
+            cd->write_handle = yaz_write_iso5428_1984;
+        }
 #if HAVE_WCHAR_H
         else if (!yaz_matchstr(tocode, "WCHAR_T"))
             cd->write_handle = yaz_write_wchar_t;
@@ -1805,15 +1906,17 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
     if (cd->init_flag)
     {
         cd->my_errno = YAZ_ICONV_UNKNOWN;
-        cd->marc8_esc_mode = 'B';
+        cd->g0_mode = 'B';
+        cd->g1_mode = 'B';
         
         cd->comb_offset = cd->comb_size = 0;
         cd->compose_char = 0;
         
-        cd->write_marc8_comb_no = 0;
         cd->write_marc8_second_half_char = 0;
         cd->write_marc8_last = 0;
-        cd->write_marc8_page_chr = "\033(B";
+        cd->write_marc8_lpage = 0;
+        cd->write_marc8_g0 = ESC "(B";
+        cd->write_marc8_g1 = 0;
         
         cd->unget_x = 0;
         cd->no_read_x = 0;