Added character conversion support for "greekadvance"; based on patch from
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 12 Mar 2007 10:59:58 +0000 (10:59 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 12 Mar 2007 10:59:58 +0000 (10:59 +0000)
Giannis Kosmas.

NEWS
src/siconv.c
test/tsticonv.c

diff --git a/NEWS b/NEWS
index ba98058..fe0392f 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+Added character conversion support for "greekadvance"; based on patch from
+Giannis Kosmas.
+
 --- 2.1.52 2007/03/07
 
 ZOOM-C: Removed hardcoded limit of 1024 sockets in ZOOM_event_sys_poll().
index 8916561..75fafbf 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.34 2007-03-09 08:39:38 adam Exp $
+ * $Id: siconv.c,v 1.35 2007-03-12 10:59:59 adam Exp $
  */
 /**
  * \file siconv.c
@@ -356,6 +356,361 @@ static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
 }
 #endif
 
+static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
+                                           size_t inbytesleft, size_t *no_read)
+{
+    unsigned long x = 0;
+    int shift = 0;
+    int tonos = 0;
+    int dialitika = 0;
+
+    *no_read = 0;
+    while (inbytesleft > 0)
+    {
+        if (*inp == 0x9d)
+        {
+            tonos = 1;
+        }
+        else if (*inp == 0x9e)
+        {
+            dialitika = 1;
+        }
+        else if (*inp == 0x9f)
+        {
+            shift = 1;
+        }
+        else
+            break;
+        inp++;
+        --inbytesleft;
+        (*no_read)++;
+    }    
+    if (inbytesleft == 0)
+    {
+        cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
+        *no_read = 0;
+        return 0;
+    }
+    switch (*inp) {
+    case 0x81:
+        if (shift) 
+            if (tonos) 
+                x = 0x0386;
+            else 
+                x = 0x0391;
+        else 
+            if (tonos) 
+                x = 0x03ac;
+            else 
+                x = 0x03b1;
+        break;
+    case 0x82:
+        if (shift) 
+            x = 0x0392;
+        else 
+            x = 0x03b2;
+        
+        break;
+    case 0x83:
+        if (shift) 
+            x = 0x0393;
+        else 
+            x = 0x03b3;
+        break;
+    case 0x84:
+        if (shift) 
+            x = 0x0394;
+        else 
+            x = 0x03b4;
+        break;
+    case 0x85:
+        if (shift) 
+            if (tonos) 
+                x = 0x0388;
+            else 
+                x = 0x0395;
+        else 
+            if (tonos) 
+                x = 0x03ad;
+            else 
+                x = 0x03b5;
+        break;
+    case 0x86:
+        if (shift) 
+            x = 0x0396;
+        else 
+            x = 0x03b6;
+        break;
+    case 0x87:
+        if (shift) 
+            if (tonos) 
+                x = 0x0389;
+            else 
+                x = 0x0397;
+        else 
+            if (tonos) 
+                x = 0x03ae;
+            else 
+                x = 0x03b7;
+        break;
+    case 0x88:
+        if (shift) 
+            x = 0x0398;
+        else 
+            x = 0x03b8;
+        break;
+    case 0x89:
+        if (shift) 
+            if (tonos) 
+                x = 0x038a;
+            else 
+                if (dialitika) 
+                    x = 0x9e;
+                else 
+                    x = 0x0399;
+        else 
+            if (tonos) 
+                if (dialitika) 
+                    x = 0x0390;
+                else 
+                    x = 0x03af;
+        
+            else 
+                if (dialitika) 
+                    x = 0x03ca;
+                else 
+                    x = 0x03b9;
+        break;
+    case 0x8a:
+        if (shift) 
+            x = 0x039a;
+        else 
+            x = 0x03ba;
+        
+        break;
+    case 0x8b:
+        if (shift) 
+            x = 0x039b;
+        else 
+            x = 0x03bb;
+        break;
+    case 0x8c:
+        if (shift) 
+            x = 0x039c;
+        else 
+            x = 0x03bc;
+        
+        break;
+    case 0x8d:
+        if (shift) 
+            x = 0x039d;
+        else 
+            x = 0x03bd;
+        break;
+    case 0x8e:
+        if (shift) 
+            x = 0x039e;
+        else 
+            x = 0x03be;
+        break;
+    case 0x8f:
+        if (shift) 
+            if (tonos) 
+                x = 0x038c;
+            else 
+                x = 0x039f;
+        else 
+            if (tonos) 
+                x = 0x03cc;
+            else 
+                x = 0x03bf;
+        break;
+    case 0x90:
+        if (shift) 
+            x = 0x03a0;
+        else 
+            x = 0x03c0;
+        break;
+    case 0x91:
+        if (shift) 
+            x = 0x03a1;
+        else 
+            x = 0x03c1;
+        break;
+    case 0x92:
+        x = 0x03c2;
+        break;
+    case 0x93:
+        if (shift) 
+            x = 0x03a3;
+        else 
+            x = 0x03c3;
+        break;
+    case 0x94:
+        if (shift) 
+            x = 0x03a4;
+        else 
+            x = 0x03c4;
+        break;
+    case 0x95:
+        if (shift) 
+            if (tonos) 
+                x = 0x038e;
+            else 
+                if (dialitika) 
+                    x = 0x03ab;
+                else 
+                    x = 0x03a5;
+        else 
+            if (tonos) 
+                if (dialitika) 
+                    x = 0x03b0;
+                else 
+                    x = 0x03cd;
+        
+            else 
+                if (dialitika) 
+                    x = 0x03cb;
+                else 
+                    x = 0x03c5;
+        break;
+    case 0x96:
+        if (shift) 
+            x = 0x03a6;
+        else 
+            x = 0x03c6;
+        break;
+    case 0x97:
+        if (shift) 
+            x = 0x03a7;
+        else 
+            x = 0x03c7;
+        break;
+    case 0x98:
+        if (shift) 
+            x = 0x03a8;
+        else 
+            x = 0x03c8;
+        
+        break;
+        
+    case 0x99:
+        if (shift) 
+            if (tonos) 
+                x = 0x038f;
+            else 
+                x = 0x03a9;
+        else 
+            if (tonos) 
+                x = 0x03ce;
+            else 
+                x = 0x03c9;
+        break;
+    default:
+        x = *inp;
+        break;
+    }
+    (*no_read)++;
+    
+    return x;
+}
+
+static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
+                                     char **outbuf, size_t *outbytesleft,
+                                     int last)
+{
+    size_t k = 0;
+    unsigned char *out = (unsigned char*) *outbuf;
+    if (*outbytesleft < 3)
+    {
+        cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
+        return (size_t)(-1);
+    }
+    switch (x)
+    {
+    case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
+    case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
+    case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
+    case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
+    case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
+    case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
+    case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
+    case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
+    case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
+    case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
+    case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
+    case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
+    case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
+    case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
+    case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
+    case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
+    case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
+    case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
+    case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
+    case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
+    case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
+    case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
+    case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
+    case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
+    case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
+    case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
+    case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
+    case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
+    case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
+    case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
+    case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
+    case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
+    case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
+    case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
+    case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
+    case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
+    case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
+    case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
+    case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
+    case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
+    case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
+    case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
+    case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
+    case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
+    case 0x03b1 : out[k++]=0x81; break;
+    case 0x03b2 : out[k++]=0x82; break;
+    case 0x03b3 : out[k++]=0x83; break;
+    case 0x03b4 : out[k++]=0x84; break;
+    case 0x03b5 : out[k++]=0x85; break;
+    case 0x03b6 : out[k++]=0x86; break;
+    case 0x03b7 : out[k++]=0x87; break;
+    case 0x03b8 : out[k++]=0x88; break;
+    case 0x03b9 : out[k++]=0x89; break;
+    case 0x03ba : out[k++]=0x8a; break;
+    case 0x03bb : out[k++]=0x8b; break;
+    case 0x03bc : out[k++]=0x8c; break;
+    case 0x03bd : out[k++]=0x8d; break;
+    case 0x03be : out[k++]=0x8e; break;
+    case 0x03bf : out[k++]=0x8f; break;
+    case 0x03c0 : out[k++]=0x90; break;
+    case 0x03c1 : out[k++]=0x91; break;
+    case 0x03c2 : out[k++]=0x92; break;
+    case 0x03c3 : out[k++]=0x93; break;
+    case 0x03c4 : out[k++]=0x94; break;
+    case 0x03c5 : out[k++]=0x95; break;
+    case 0x03c6 : out[k++]=0x96; break;
+    case 0x03c7 : out[k++]=0x96; break;
+    case 0x03c8 : out[k++]=0x98; break;
+    case 0x03c9 : out[k++]=0x99; break;
+    default:
+        if (x > 255)
+        {
+            cd->my_errno = YAZ_ICONV_EILSEQ;
+            return (size_t) -1;
+        }
+        out[k++] = x;
+        break;
+    }
+    *outbytesleft -= k;
+    (*outbuf) += k;
+    return 0;
+}
+
 
 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
                                           size_t inbytesleft, size_t *no_read,
@@ -394,6 +749,7 @@ static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
     {
         int comb = 0;
+
         if (inbytesleft == 0 && cd->comb_size)
         {
             cd->my_errno = YAZ_ICONV_EINVAL;
@@ -1001,6 +1357,8 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
             cd->read_handle = yaz_read_marc8;
         else if (!yaz_matchstr(fromcode, "MARC8s"))
             cd->read_handle = yaz_read_marc8s;
+        else if (!yaz_matchstr(fromcode, "advancegreek"))
+            cd->read_handle = yaz_read_advancegreek;
 #if HAVE_WCHAR_H
         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
             cd->read_handle = yaz_read_wchar_t;
@@ -1024,6 +1382,10 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
             cd->write_handle = yaz_write_marc8;
             cd->flush_handle = yaz_flush_marc8;
         }
+        else if (!yaz_matchstr(tocode, "advancegreek"))
+        {
+            cd->write_handle = yaz_write_advancegreek;
+        }
 #if HAVE_WCHAR_H
         else if (!yaz_matchstr(tocode, "WCHAR_T"))
             cd->write_handle = yaz_write_wchar_t;
index 43455ad..7c69c0b 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: tsticonv.c,v 1.25 2007-03-09 08:39:38 adam Exp $
+ * $Id: tsticonv.c,v 1.26 2007-03-12 10:59:59 adam Exp $
  */
 
 #if HAVE_CONFIG_H
@@ -473,6 +473,29 @@ static void tst_utf8_to_marc8(void)
     yaz_iconv_close(cd);
 }
 
+static void tst_advance_to_utf8(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("utf-8", "advancegreek");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+    yaz_iconv_close(cd);
+}
+
+static void tst_utf8_to_advance(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("advancegreek", "utf-8");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+    yaz_iconv_close(cd);
+}
 
 static void tst_latin1_to_marc8(void)
 {
@@ -539,6 +562,9 @@ int main (int argc, char **argv)
 
     tst_marc8_to_latin1();
 
+    tst_advance_to_utf8();
+    tst_utf8_to_advance();
+
     tst_utf8_to_marc8();
 
     tst_latin1_to_marc8();