Fixed yaz_iconv to return YAZ_ICONV_EINVAL if an incomplete MARC-8
[yaz-moved-to-github.git] / test / tsticonv.c
index 1799002..43455ad 100644 (file)
 /*
- * Copyright (c) 2002-2004, Index Data
+ * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: tsticonv.c,v 1.2 2004-03-15 21:39:06 adam Exp $
+ * $Id: tsticonv.c,v 1.25 2007-03-09 08:39:38 adam Exp $
  */
 
 #if HAVE_CONFIG_H
 #include <config.h>
 #endif
 
+#include <stdlib.h>
 #include <errno.h>
 #include <string.h>
 #include <ctype.h>
 
 #include <yaz/yaz-util.h>
+#include <yaz/test.h>
 
-/* some test strings in ISO-8859-1 format */
-static const char *iso_8859_1_a[] = {
-    "ax" ,
-    "\330",
-    "eneb\346r",
-    "\xfc",
-    "\xfb",
-    "\xfbr",
-    0 };
-
-/* same test strings in MARC-8 format */
-static const char *marc8_a[] = {
-    "ax",   
-    "\xa2",          /* latin capital letter o with stroke */
-    "eneb\xb5r",     /* latin small letter ae */
-    "\xe8\x75",      /* latin small letter u with umlaut */
-    "\xe3\x75",      /* latin small letter u with circumflex */
-    "\xe3\x75r",     /* latin small letter u with circumflex */
-    0
-};
-
-static void marc8_tst_a()
+static int compare_buffers(char *msg, int no,
+                           int expect_len, const char *expect_buf,
+                           int got_len, const char *got_buf)
 {
-    int i;
-    yaz_iconv_t cd;
-
-    cd = yaz_iconv_open("ISO-8859-1", "MARC8");
-    if (!cd)
+    if (expect_len == got_len
+        && !memcmp(expect_buf, got_buf, expect_len))
+        return 1;
+    
+    if (0) /* use 1 see how the buffers differ (for debug purposes) */
     {
-       printf("tsticonv 10 yaz_iconv_open failed\n");
-       exit(10);
+        int i;
+        printf("tsticonv test=%s i=%d failed\n", msg, no);
+        printf("off got exp\n");
+        for (i = 0; i<got_len || i<expect_len; i++)
+        {
+            char got_char[10];
+            char expect_char[10];
+            
+            if (i < got_len)
+                sprintf(got_char, "%02X", got_buf[i]);
+            else
+                sprintf(got_char, "?  ");
+            
+            if (i < expect_len)
+                sprintf(expect_char, "%02X", expect_buf[i]);
+            else
+                sprintf(expect_char, "?  ");
+            
+            printf("%02d  %s  %s %c\n",
+                   i, got_char, expect_char, got_buf[i] == expect_buf[i] ?
+                   ' ' : '*');
+            
+        }
     }
-    for (i = 0; iso_8859_1_a[i]; i++)
-    {
-        size_t r;
-        char *inbuf= (char*) marc8_a[i];
-        size_t inbytesleft = strlen(inbuf);
-        char outbuf0[24];
-        char *outbuf = outbuf0;
-        size_t outbytesleft = sizeof(outbuf0);
+    return 0;
+}
+
+static int tst_convert_l(yaz_iconv_t cd, size_t in_len, const char *in_buf,
+                         size_t expect_len, const char *expect_buf)
+{
+    size_t r;
+    char *inbuf= (char*) in_buf;
+    size_t inbytesleft = in_len > 0 ? in_len : strlen(in_buf);
+    char outbuf0[64];
+    char *outbuf = outbuf0;
 
+    while (inbytesleft)
+    {
+        size_t outbytesleft = outbuf0 + sizeof(outbuf0) - outbuf;
+        if (outbytesleft > 12)
+            outbytesleft = 12;
         r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
         if (r == (size_t) (-1))
         {
             int e = yaz_iconv_error(cd);
-
-            printf ("tsticonv 11 i=%d e=%d\n", i, e);
-           exit(11);
+            if (e != YAZ_ICONV_E2BIG)
+                return 0;
         }
-        if ((outbuf - outbuf0) != strlen(iso_8859_1_a[i]) 
-            || memcmp(outbuf0, iso_8859_1_a[i],
-                     strlen(iso_8859_1_a[i])))
+        else
+            break;
+    }
+    return compare_buffers("tsticonv 22", 0,
+                           expect_len, expect_buf,
+                           outbuf - outbuf0, outbuf0);
+}
+
+static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf)
+{
+    int ret = 0;
+    WRBUF b = wrbuf_alloc();
+    char outbuf[12];
+    size_t inbytesleft = strlen(buf);
+    const char *inp = buf;
+    int rounds = 0;
+    for (rounds = 0; inbytesleft && rounds < sizeof(outbuf); rounds++)
+    {
+        size_t outbytesleft = sizeof(outbuf);
+        char *outp = outbuf;
+        size_t r = yaz_iconv(cd, (char**) &inp,  &inbytesleft,
+                             &outp, &outbytesleft);
+        wrbuf_write(b, outbuf, outp - outbuf);
+        if (r == (size_t) (-1))
         {
-            printf ("tsticonv 12 i=%d\n", i);
-            printf ("buf=%s   out=%s\n", iso_8859_1_a[i], outbuf0);
-           exit(12);
+            int e = yaz_iconv_error(cd);
+            if (e != YAZ_ICONV_E2BIG)
+                break;
         }
     }
+    if (wrbuf_len(b) == strlen(cmpbuf) 
+        && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b)))
+        ret = 1;
+    else
+        yaz_log(YLOG_LOG, "GOT (%.*s)", wrbuf_len(b), wrbuf_buf(b));
+    wrbuf_free(b, 1);
+    return ret;
+}
+
+
+/* some test strings in ISO-8859-1 format */
+static const char *iso_8859_1_a[] = {
+    "ax" ,
+    "\xd8",
+    "eneb\346r",
+    "\xe5" "\xd8",
+    "\xe5" "\xd8" "b",
+    "\xe5" "\xe5",
+    0 };
+
+static void tst_marc8_to_ucs4b(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("UCS4", "MARC8");
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+    
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o",
+                  8, 
+                  "\x00\x00\xFF\x1F" "\x00\x00\x00o"));
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\033$1" "\x6F\x77\x29" /* AE0E */
+                  "\x6F\x52\x7C" /* c0F4 */ "\033(B",
+                  8,
+                  "\x00\x00\xAE\x0E" "\x00\x00\xC0\xF4"));
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\033$1"
+                  "\x21\x50\x6E"  /* UCS 7CFB */
+                  "\x21\x51\x31"  /* UCS 7D71 */
+                  "\x21\x3A\x67"  /* UCS 5B89 */
+                  "\x21\x33\x22"  /* UCS 5168 */
+                  "\x21\x33\x53"  /* UCS 5206 */
+                  "\x21\x44\x2B"  /* UCS 6790 */
+                  "\033(B",
+                  24, 
+                  "\x00\x00\x7C\xFB"
+                  "\x00\x00\x7D\x71"
+                  "\x00\x00\x5B\x89"
+                  "\x00\x00\x51\x68"
+                  "\x00\x00\x52\x06"
+                  "\x00\x00\x67\x90"));
+
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\xB0\xB2",     /* AYN and oSLASH */
+                  8, 
+                  "\x00\x00\x02\xBB"  "\x00\x00\x00\xF8"));
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\xF6\x61",     /* a underscore */
+                  8, 
+                  "\x00\x00\x00\x61"  "\x00\x00\x03\x32"));
+
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\x61\xC2",     /* a, phonorecord mark */
+                  8,
+                  "\x00\x00\x00\x61"  "\x00\x00\x21\x17"));
+
+    /* bug #258 */
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "el" "\xe8" "am\xe8" "an", /* elaman where a is a" */
+                  32,
+                  "\x00\x00\x00" "e"
+                  "\x00\x00\x00" "l"
+                  "\x00\x00\x00" "a"
+                  "\x00\x00\x03\x08"
+                  "\x00\x00\x00" "m"
+                  "\x00\x00\x00" "a"
+                  "\x00\x00\x03\x08"
+                  "\x00\x00\x00" "n"));
+    /* bug #260 */
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\xe5\xe8\x41",
+                  12, 
+                  "\x00\x00\x00\x41" "\x00\x00\x03\x04" "\x00\x00\x03\x08"));
+    /* bug #416 */
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\xEB\x74\xEC\x73",
+                  12,
+                  "\x00\x00\x00\x74" "\x00\x00\x03\x61" "\x00\x00\x00\x73"));
+    /* bug #416 */
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  0,
+                  "\xFA\x74\xFB\x73",
+                  12, 
+                  "\x00\x00\x00\x74" "\x00\x00\x03\x60" "\x00\x00\x00\x73"));
+
+    yaz_iconv_close(cd);
+}
+
+static void tst_ucs4b_to_utf8(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("UTF8", "UCS4");
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  8,
+                  "\x00\x00\xFF\x1F\x00\x00\x00o",
+                  4,
+                  "\xEF\xBC\x9F\x6F"));
+
+    YAZ_CHECK(tst_convert_l(
+                  cd,
+                  8, 
+                  "\x00\x00\xAE\x0E\x00\x00\xC0\xF4",
+                  6,
+                  "\xEA\xB8\x8E\xEC\x83\xB4"));
     yaz_iconv_close(cd);
 }
 
-static void marc8_tst_b()
+static void dconvert(int mandatory, const char *tmpcode)
 {
-    static const char *marc8_b[] = {
-       "\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o",
-       "\033$1" "\x6F\x77\x29" /* AE0E */ "\x6F\x52\x7C" /* c0F4 */ "\033(B",
-       "\033$1"
-       "\x21\x50\x6E"  /* 7CFB */
-       "\x21\x51\x31"  /* 7D71 */
-       "\x21\x3A\x67"  /* 5B89 */
-       "\x21\x33\x22"  /* 5168 */
-       "\x21\x33\x53"  /* 5206 */
-       "\x21\x44\x2B"  /* 6790 */
-       "\033(B",
-       0
-    };
-    static const char *ucs4_b[] = {
-       "\x00\x00\xFF\x1F" "\x00\x00\x00o",
-       "\x00\x00\xAE\x0E" "\x00\x00\xC0\xF4",
-       "\x00\x00\x7C\xFB"
-       "\x00\x00\x7D\x71"
-       "\x00\x00\x5B\x89"
-       "\x00\x00\x51\x68"
-       "\x00\x00\x52\x06"
-       "\x00\x00\x67\x90",
-       0
-    };
     int i;
+    int ret;
     yaz_iconv_t cd;
-
-    cd = yaz_iconv_open("UCS4", "MARC8");
-    if (!cd)
-    {
-       printf ("tsticonv 20 yaz_iconv_open failed\n");
-       exit(20);
-    }
-    for (i = 0; marc8_b[i]; i++)
+    for (i = 0; iso_8859_1_a[i]; i++)
     {
         size_t r;
-       size_t len;
-       size_t expect_len = (i == 2 ? 24 : 8);
-        char *inbuf= (char*) marc8_b[i];
+        char *inbuf = (char*) iso_8859_1_a[i];
         size_t inbytesleft = strlen(inbuf);
         char outbuf0[24];
+        char outbuf1[10];
         char *outbuf = outbuf0;
         size_t outbytesleft = sizeof(outbuf0);
 
+        cd = yaz_iconv_open(tmpcode, "ISO-8859-1");
+        YAZ_CHECK(cd || !mandatory);
+        if (!cd)
+            return;
         r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+        YAZ_CHECK(r != (size_t) (-1));
+        yaz_iconv_close(cd);
         if (r == (size_t) (-1))
-        {
-            int e = yaz_iconv_error(cd);
+            return;
+        
+        cd = yaz_iconv_open("ISO-8859-1", tmpcode);
+        YAZ_CHECK(cd || !mandatory);
+        if (!cd)
+            return;
+        inbuf = outbuf0;
+        inbytesleft = sizeof(outbuf0) - outbytesleft;
 
-            printf ("tsticonv 21 i=%d e=%d\n", i, e);
-           exit(21);
-        }
-       len = outbuf - outbuf0;
-       if (len != expect_len || memcmp(outbuf0, ucs4_b[i], len))
+        outbuf = outbuf1;
+        outbytesleft = sizeof(outbuf1);
+        r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+        YAZ_CHECK(r != (size_t) (-1));
+        if (r != (size_t)(-1)) 
         {
-            printf ("tsticonv 22 len=%d gotlen=%d i=%d\n", expect_len, len, i);
-           exit(22);
+            ret = compare_buffers("dconvert", i,
+                                  strlen(iso_8859_1_a[i]), iso_8859_1_a[i],
+                              sizeof(outbuf1) - outbytesleft, outbuf1);
+            YAZ_CHECK(ret);
         }
+        yaz_iconv_close(cd);
     }
-    yaz_iconv_close(cd);
 }
 
-static void marc8_tst_c()
+int utf8_check(unsigned c)
 {
-    static const char *ucs4_c[] = {
-       "\x00\x00\xFF\x1F\x00\x00\x00o",
-       "\x00\x00\xAE\x0E\x00\x00\xC0\xF4",
-       0
-    };
-    static const char *utf8_c[] = {
-       "\xEF\xBC\x9F\x6F",
-       "\xEA\xB8\x8E\xEC\x83\xB4",
-       0
-    };
-    
-    int i;
-    yaz_iconv_t cd;
-
-    cd = yaz_iconv_open("UTF8", "UCS4");
-    if (!cd)
-    {
-       printf ("tsticonv 30 yaz_iconv_open failed\n");
-       exit(30);
-    }
-    for (i = 0; ucs4_c[i]; i++)
+    if (sizeof(c) >= 4)
     {
         size_t r;
-       size_t len;
-        char *inbuf= (char*) ucs4_c[i];
-        size_t inbytesleft = 8;
-        char outbuf0[24];
-        char *outbuf = outbuf0;
-        size_t outbytesleft = sizeof(outbuf0);
+        char src[4];
+        char dst[4];
+        char utf8buf[6];
+        char *inbuf = src;
+        size_t inbytesleft = 4;
+        char *outbuf = utf8buf;
+        size_t outbytesleft = sizeof(utf8buf);
+        int i;
+        yaz_iconv_t cd = yaz_iconv_open("UTF-8", "UCS4LE");
+        if (!cd)
+            return 0;
+        for (i = 0; i<4; i++)
+            src[i] = c >> (i*8);
+        
+        r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+        yaz_iconv_close(cd);
+
+        if (r == (size_t)(-1))
+            return 0;
+
+        cd = yaz_iconv_open("UCS4LE", "UTF-8");
+        if (!cd)
+            return 0;
+        inbytesleft = sizeof(utf8buf) - outbytesleft;
+        inbuf = utf8buf;
+
+        outbuf = dst;
+        outbytesleft = 4;
 
         r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-        if (r == (size_t) (-1))
-        {
-            int e = yaz_iconv_error(cd);
+        if (r == (size_t)(-1))
+            return 0;
 
-            printf ("tsticonv 31 i=%d e=%d\n", i, e);
-           exit(31);
-        }
-       len = outbuf - outbuf0;
-       if (len != strlen(utf8_c[i]) || memcmp(outbuf0, utf8_c[i], len))
-        {
-            printf ("tsticonv 32 len=%d gotlen=%d i=%d\n",
-                   strlen(utf8_c[i]), len, i);
-           exit(32);
-        }
+        yaz_iconv_close(cd);
+
+        if (memcmp(src, dst, 4))
+            return 0;
     }
+    return 1;
+}
+        
+static void tst_marc8_to_utf8(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "Cours de math", 
+                          "Cours de math"));
+    /* COMBINING ACUTE ACCENT */
+    YAZ_CHECK(tst_convert(cd, "Cours de mathâe", 
+                          "Cours de mathe\xcc\x81"));
+
+
+    YAZ_CHECK(tst_convert(cd, "a\xea\x1e", "a\x1e\xcc\x8a"));
+
+    YAZ_CHECK(tst_convert(cd, "a\xea", "a"));
     yaz_iconv_close(cd);
 }
 
-static void dconvert(int mandatory, const char *tmpcode)
+static void tst_marc8s_to_utf8(void)
 {
-    int i;
-    yaz_iconv_t cd;
-    for (i = 0; iso_8859_1_a[i]; i++)
-    {
-        size_t r;
-       char *inbuf = (char*) iso_8859_1_a[i];
-       size_t inbytesleft = strlen(inbuf);
-       char outbuf0[24];
-       char outbuf1[10];
-       char *outbuf = outbuf0;
-       size_t outbytesleft = sizeof(outbuf0);
+    yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8s");
 
-        cd = yaz_iconv_open(tmpcode, "ISO-8859-1");
-       if (!cd)
-        {
-            if (!mandatory)
-                return;
-            printf ("tsticonv code=%s 1\n", tmpcode);
-           exit(1);
-        }
-       r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-       if (r == (size_t)(-1))
-        {
-            int e = yaz_iconv_error(cd);
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
 
-            printf ("tsticonv code=%s 2 e=%d\n", tmpcode, e);
-           exit(2);
-        }
-       yaz_iconv_close(cd);
-        
-       cd = yaz_iconv_open("ISO-8859-1", tmpcode);
-       if (!cd)
-        {
-            if (!mandatory)
-                return;
-            printf ("tsticonv code=%s 3\n", tmpcode);
-           exit(3);
-        }
-       inbuf = outbuf0;
-       inbytesleft = sizeof(outbuf0) - outbytesleft;
+    YAZ_CHECK(tst_convert(cd, "Cours de math", 
+                          "Cours de math"));
+    /* E9: LATIN SMALL LETTER E WITH ACUTE */
+    YAZ_CHECK(tst_convert(cd, "Cours de mathâe", 
+                          "Cours de math\xc3\xa9"));
 
-       outbuf = outbuf1;
-       outbytesleft = sizeof(outbuf1);
-       r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-       if (r == (size_t)(-1)) {
-            int e = yaz_iconv_error(cd);
+    yaz_iconv_close(cd);
+}
 
-            printf ("tsticonv code=%s 4 e=%d\n", tmpcode, e);
-           exit(4);
-       }
-       if (strlen(iso_8859_1_a[i]) == 
-           (sizeof(outbuf1) - outbytesleft) &&
-            memcmp(outbuf1, iso_8859_1_a[i],
-                  strlen(iso_8859_1_a[i])))
-        {
-            printf ("tsticonv code=%s 5\n", tmpcode);
-            exit(5);
-       }
-       yaz_iconv_close(cd);
-    }
+
+static void tst_marc8_to_latin1(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "ax", "ax"));
+
+    /* latin capital letter o with stroke */
+    YAZ_CHECK(tst_convert(cd, "\xa2", "\xd8"));
+
+    /* with latin small letter ae */
+    YAZ_CHECK(tst_convert(cd, "eneb\xb5r", "eneb\346r"));
+
+    YAZ_CHECK(tst_convert(cd, "\xea" "a\xa2", "\xe5" "\xd8"));
+
+    YAZ_CHECK(tst_convert(cd, "\xea" "a\xa2" "b", "\xe5" "\xd8" "b"));
+
+    YAZ_CHECK(tst_convert(cd, "\xea" "a"  "\xea" "a", "\xe5" "\xe5"));
+
+    YAZ_CHECK(tst_convert(cd, "Cours de math", 
+                          "Cours de math"));
+    YAZ_CHECK(tst_convert(cd, "Cours de mathâe", 
+                          "Cours de mathé"));
+    YAZ_CHECK(tst_convert(cd, "12345678âe", 
+                          "12345678é"));
+    YAZ_CHECK(tst_convert(cd, "123456789âe", 
+                          "123456789é"));
+    YAZ_CHECK(tst_convert(cd, "1234567890âe", 
+                          "1234567890é"));
+    YAZ_CHECK(tst_convert(cd, "12345678901âe", 
+                          "12345678901é"));
+    YAZ_CHECK(tst_convert(cd, "Cours de mathâem", 
+                          "Cours de mathém"));
+    YAZ_CHECK(tst_convert(cd, "Cours de mathâematiques", 
+                          "Cours de mathématiques"));
+
+    yaz_iconv_close(cd);
 }
-       
+
+static void tst_utf8_to_marc8(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+
+    /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */
+    YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat"));
+
+    /** Pure ASCII. 12 characters (sizeof(outbuf)) */
+    YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"));
+
+    /** Pure ASCII. 13 characters (sizeof(outbuf)) */
+    YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math."));
+
+    /** UPPERCASE SCANDINAVIAN O */
+    YAZ_CHECK(tst_convert(cd, "S\xc3\x98", "S\xa2"));
+
+    /** ARING */
+    YAZ_CHECK(tst_convert(cd, "A" "\xCC\x8A", "\xEA" "A"));
+
+    /** A MACRON + UMLAUT, DIAERESIS */
+    YAZ_CHECK(tst_convert(cd, "A" "\xCC\x84" "\xCC\x88",
+                          "\xE5\xE8\x41"));
+    
+    /* Ligature spanning two characters */
+    YAZ_CHECK(tst_convert(cd,
+                          "\x74" "\xCD\xA1" "\x73",  /* UTF-8 */
+                          "\xEB\x74\xEC\x73"));      /* MARC-8 */
+
+    /* Double title spanning two characters */
+    YAZ_CHECK(tst_convert(cd,
+                          "\x74" "\xCD\xA0" "\x73",  /* UTF-8 */
+                          "\xFA\x74\xFB\x73"));      /* MARC-8 */
+
+    /** Ideographic question mark (Unicode FF1F) */
+    YAZ_CHECK(tst_convert(cd,
+                          "\xEF\xBC\x9F" "o",        /* UTF-8 */
+                          "\033$1" "\x21\x2B\x3B" "\033(B" "o" ));
+
+
+    /** Superscript 0 . bug #642 */
+    YAZ_CHECK(tst_convert(cd,
+                          "(\xe2\x81\xb0)",        /* UTF-8 */
+                          "(\033p0\x1bs)"));
+    
+    yaz_iconv_close(cd);
+}
+
+
+static void tst_latin1_to_marc8(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+
+    /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */
+    YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat"));
+
+    /** Pure ASCII. 12 characters (sizeof(outbuf)) */
+    YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"));
+
+    /** Pure ASCII. 13 characters (sizeof(outbuf)) */
+    YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math."));
+
+    /** D8: UPPERCASE SCANDINAVIAN O */
+    YAZ_CHECK(tst_convert(cd, "S\xd8", "S\xa2"));
+
+    /** E9: LATIN SMALL LETTER E WITH ACUTE */
+    YAZ_CHECK(tst_convert(cd, "Cours de math\xe9", "Cours de mathâe"));
+    YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"
+                  ));
+    YAZ_CHECK(tst_convert(cd, "Cours de mathé", "Cours de mathâe" ));
+    YAZ_CHECK(tst_convert(cd, "12345678é","12345678âe"));
+    YAZ_CHECK(tst_convert(cd, "123456789é", "123456789âe"));
+    YAZ_CHECK(tst_convert(cd, "1234567890é","1234567890âe"));
+    YAZ_CHECK(tst_convert(cd, "12345678901é", "12345678901âe"));
+    YAZ_CHECK(tst_convert(cd, "Cours de mathém", "Cours de mathâem"));
+    YAZ_CHECK(tst_convert(cd, "Cours de mathématiques",
+                          "Cours de mathâematiques"));
+    yaz_iconv_close(cd);
+}
+
+static void tst_utf8_codes(void)
+{
+    YAZ_CHECK(utf8_check(3));
+    YAZ_CHECK(utf8_check(127));
+    YAZ_CHECK(utf8_check(128));
+    YAZ_CHECK(utf8_check(255));
+    YAZ_CHECK(utf8_check(256));
+    YAZ_CHECK(utf8_check(900));
+    YAZ_CHECK(utf8_check(1000));
+    YAZ_CHECK(utf8_check(10000));
+    YAZ_CHECK(utf8_check(100000));
+    YAZ_CHECK(utf8_check(1000000));
+    YAZ_CHECK(utf8_check(10000000));
+    YAZ_CHECK(utf8_check(100000000));
+}
+
 int main (int argc, char **argv)
 {
+    YAZ_CHECK_INIT(argc, argv);
+
+    tst_utf8_codes();
+
+    tst_marc8_to_utf8();
+
+    tst_marc8s_to_utf8();
+
+    tst_marc8_to_latin1();
+
+    tst_utf8_to_marc8();
+
+    tst_latin1_to_marc8();
+
+    tst_marc8_to_ucs4b();
+    tst_ucs4b_to_utf8();
+
     dconvert(1, "UTF-8");
     dconvert(1, "ISO-8859-1");
     dconvert(1, "UCS4");
     dconvert(1, "UCS4LE");
     dconvert(0, "CP865");
-    marc8_tst_a();
-    marc8_tst_b();
-    marc8_tst_c();
-    exit (0);
+
+    YAZ_CHECK_TERM;
 }
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */