X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=test%2Ftsticonv.c;h=14129dbaeaba76e8d1292cd8a8dd6a26fd11d8c4;hp=b308a2c44565d485638f68590a17688b6b1df74c;hb=c7899589e137fd6848870b45f692ab0832b7069a;hpb=713dfee4f059554f5450ef5f914a8bc4d8648af3 diff --git a/test/tsticonv.c b/test/tsticonv.c index b308a2c..14129db 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -1,275 +1,697 @@ /* - * Copyright (c) 2002-2004, Index Data + * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.4 2004-11-16 22:51:52 adam Exp $ + * $Id: tsticonv.c,v 1.35 2008-03-12 08:53:28 adam Exp $ */ #if HAVE_CONFIG_H #include #endif +#include #include #include #include #include +#include -/* some test strings in ISO-8859-1 format */ -static const char *iso_8859_1_a[] = { - "ax" , - "\330", - "eneb\346r", - 0 }; - -/* same test strings in MARC-8 format */ -static const char *marc8_a[] = { - "ax", - "\xa2", /* latin capital letter o with stroke */ - "eneb\xb5r", /* latin small letter ae */ - 0 -}; +#define ESC "\x1b" -static void marc8_tst_a() +static int compare_buffers(char *msg, int no, + int expect_len, const char *expect_buf, + int got_len, const char *got_buf) { - int i; - yaz_iconv_t cd; - - cd = yaz_iconv_open("ISO-8859-1", "MARC8"); - if (!cd) + if (expect_len == got_len + && !memcmp(expect_buf, got_buf, expect_len)) + return 1; + + if (0) /* use 1 see how the buffers differ (for debug purposes) */ { - printf("tsticonv 10 yaz_iconv_open failed\n"); - exit(10); + int i; + printf("tsticonv test=%s i=%d failed\n", msg, no); + printf("off got exp\n"); + for (i = 0; i 0 ? in_len : strlen(in_buf); + char outbuf0[64]; + char *outbuf = outbuf0; + + while (inbytesleft) + { + size_t outbytesleft = outbuf0 + sizeof(outbuf0) - outbuf; + if (outbytesleft > 12) + outbytesleft = 12; r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (r == (size_t) (-1)) { int e = yaz_iconv_error(cd); - - printf ("tsticonv 11 i=%d e=%d\n", i, e); - exit(11); + if (e != YAZ_ICONV_E2BIG) + return 0; } - if ((outbuf - outbuf0) != strlen(iso_8859_1_a[i]) - || memcmp(outbuf0, iso_8859_1_a[i], - strlen(iso_8859_1_a[i]))) + else { - printf ("tsticonv 12 i=%d\n", i); - printf ("buf=%s out=%s\n", iso_8859_1_a[i], outbuf0); - exit(12); + yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + break; } } - yaz_iconv_close(cd); + + return compare_buffers("tsticonv 22", 0, + expect_len, expect_buf, + outbuf - outbuf0, outbuf0); } -static void marc8_tst_b() +static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) { - static const char *marc8_b[] = { - /* 0 */ - "\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o", - /* 1 */ - "\033$1" "\x6F\x77\x29" /* AE0E */ "\x6F\x52\x7C" /* c0F4 */ "\033(B", - /* 2 */ - "\033$1" - "\x21\x50\x6E" /* UCS 7CFB */ - "\x21\x51\x31" /* UCS 7D71 */ - "\x21\x3A\x67" /* UCS 5B89 */ - "\x21\x33\x22" /* UCS 5168 */ - "\x21\x33\x53" /* UCS 5206 */ - "\x21\x44\x2B" /* UCS 6790 */ - "\033(B", - /* 3 */ - "\xB0\xB2", /* AYN and oSLASH */ - /* 4 */ - "\xF6\x61", /* a underscore */ - /* 5 */ - "\x61\xC2", /* a, phonorecord mark */ - 0 - }; - static const char *ucs4_b[] = { - "\x00\x00\xFF\x1F" "\x00\x00\x00o", - "\x00\x00\xAE\x0E" "\x00\x00\xC0\xF4", - "\x00\x00\x7C\xFB" - "\x00\x00\x7D\x71" - "\x00\x00\x5B\x89" - "\x00\x00\x51\x68" - "\x00\x00\x52\x06" - "\x00\x00\x67\x90", - "\x00\x00\x02\xBB" "\x00\x00\x00\xF8", - "\x00\x00\x00\x61" "\x00\x00\x03\x32", - "\x00\x00\x00\x61" "\x00\x00\x21\x17", - 0 - }; - int i; - yaz_iconv_t cd; - - cd = yaz_iconv_open("UCS4", "MARC8"); - if (!cd) + int ret = 0; + WRBUF b = wrbuf_alloc(); + char outbuf[12]; + size_t inbytesleft = strlen(buf); + const char *inp = buf; + int rounds = 0; + for (rounds = 0; inbytesleft && rounds < sizeof(outbuf); rounds++) { - printf ("tsticonv 20 yaz_iconv_open failed\n"); - exit(20); - } - for (i = 0; marc8_b[i]; i++) - { - size_t r; - size_t len; - size_t expect_len = i == 2 ? 24 : 8; - char *inbuf= (char*) marc8_b[i]; - size_t inbytesleft = strlen(inbuf); - char outbuf0[24]; - char *outbuf = outbuf0; - size_t outbytesleft = sizeof(outbuf0); - - r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft, + &outp, &outbytesleft); + wrbuf_write(b, outbuf, outp - outbuf); if (r == (size_t) (-1)) { int e = yaz_iconv_error(cd); - - printf ("tsticonv 21 i=%d e=%d\n", i, e); - exit(21); + if (e != YAZ_ICONV_E2BIG) + break; } - len = outbuf - outbuf0; - if (len != expect_len || memcmp(outbuf0, ucs4_b[i], len)) + else { - printf ("tsticonv 22 len=%d gotlen=%d i=%d\n", expect_len, len, i); - exit(22); + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft); + wrbuf_write(b, outbuf, outp - outbuf); + break; } } - yaz_iconv_close(cd); + if (wrbuf_len(b) == strlen(cmpbuf) + && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b))) + ret = 1; + else + { + WRBUF w = wrbuf_alloc(); + + wrbuf_rewind(w); + wrbuf_puts_escaped(w, buf); + yaz_log(YLOG_LOG, "input %s", wrbuf_cstr(w)); + + wrbuf_rewind(w); + wrbuf_write_escaped(w, wrbuf_buf(b), wrbuf_len(b)); + yaz_log(YLOG_LOG, "got %s", wrbuf_cstr(w)); + + wrbuf_rewind(w); + wrbuf_puts_escaped(w, cmpbuf); + yaz_log(YLOG_LOG, "exp %s", wrbuf_cstr(w)); + + wrbuf_destroy(w); + } + + wrbuf_destroy(b); + return ret; } -static void marc8_tst_c() + +/* some test strings in ISO-8859-1 format */ +static const char *iso_8859_1_a[] = { + "ax" , + "\xd8", + "eneb\346r", + "\xe5" "\xd8", + "\xe5" "\xd8" "b", + "\xe5" "\xe5", + 0 }; + +static void tst_marc8_to_ucs4b(void) { - static const char *ucs4_c[] = { - "\x00\x00\xFF\x1F\x00\x00\x00o", - "\x00\x00\xAE\x0E\x00\x00\xC0\xF4", - 0 - }; - static const char *utf8_c[] = { - "\xEF\xBC\x9F\x6F", - "\xEA\xB8\x8E\xEC\x83\xB4", - 0 - }; + yaz_iconv_t cd = yaz_iconv_open("UCS4", "MARC8"); + YAZ_CHECK(cd); + if (!cd) + return; - int i; - yaz_iconv_t cd; + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o", + 8, + "\x00\x00\xFF\x1F" "\x00\x00\x00o")); + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\033$1" "\x6F\x77\x29" /* AE0E */ + "\x6F\x52\x7C" /* c0F4 */ "\033(B", + 8, + "\x00\x00\xAE\x0E" "\x00\x00\xC0\xF4")); + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\033$1" + "\x21\x50\x6E" /* UCS 7CFB */ + "\x21\x51\x31" /* UCS 7D71 */ + "\x21\x3A\x67" /* UCS 5B89 */ + "\x21\x33\x22" /* UCS 5168 */ + "\x21\x33\x53" /* UCS 5206 */ + "\x21\x44\x2B" /* UCS 6790 */ + "\033(B", + 24, + "\x00\x00\x7C\xFB" + "\x00\x00\x7D\x71" + "\x00\x00\x5B\x89" + "\x00\x00\x51\x68" + "\x00\x00\x52\x06" + "\x00\x00\x67\x90")); + + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\xB0\xB2", /* AYN and oSLASH */ + 8, + "\x00\x00\x02\xBB" "\x00\x00\x00\xF8")); + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\xF6\x61", /* a underscore */ + 8, + "\x00\x00\x00\x61" "\x00\x00\x03\x32")); + + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\x61\xC2", /* a, phonorecord mark */ + 8, + "\x00\x00\x00\x61" "\x00\x00\x21\x17")); - cd = yaz_iconv_open("UTF8", "UCS4"); + /* bug #258 */ + YAZ_CHECK(tst_convert_l( + cd, + 0, + "el" "\xe8" "am\xe8" "an", /* elaman where a is a" */ + 32, + "\x00\x00\x00" "e" + "\x00\x00\x00" "l" + "\x00\x00\x00" "a" + "\x00\x00\x03\x08" + "\x00\x00\x00" "m" + "\x00\x00\x00" "a" + "\x00\x00\x03\x08" + "\x00\x00\x00" "n")); + /* bug #260 */ + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\xe5\xe8\x41", + 12, + "\x00\x00\x00\x41" "\x00\x00\x03\x04" "\x00\x00\x03\x08")); + /* bug #416 */ + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\xEB\x74\xEC\x73", + 12, + "\x00\x00\x00\x74" "\x00\x00\x03\x61" "\x00\x00\x00\x73")); + /* bug #416 */ + YAZ_CHECK(tst_convert_l( + cd, + 0, + "\xFA\x74\xFB\x73", + 12, + "\x00\x00\x00\x74" "\x00\x00\x03\x60" "\x00\x00\x00\x73")); + + yaz_iconv_close(cd); +} + +static void tst_ucs4b_to_utf8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("UTF8", "UCS4"); + YAZ_CHECK(cd); if (!cd) - { - printf ("tsticonv 30 yaz_iconv_open failed\n"); - exit(30); - } - for (i = 0; ucs4_c[i]; i++) + return; + YAZ_CHECK(tst_convert_l( + cd, + 8, + "\x00\x00\xFF\x1F\x00\x00\x00o", + 4, + "\xEF\xBC\x9F\x6F")); + + YAZ_CHECK(tst_convert_l( + cd, + 8, + "\x00\x00\xAE\x0E\x00\x00\xC0\xF4", + 6, + "\xEA\xB8\x8E\xEC\x83\xB4")); + yaz_iconv_close(cd); +} + +static void dconvert(int mandatory, const char *tmpcode) +{ + int i; + int ret; + yaz_iconv_t cd; + for (i = 0; iso_8859_1_a[i]; i++) { size_t r; - size_t len; - char *inbuf= (char*) ucs4_c[i]; - size_t inbytesleft = 8; + char *inbuf = (char*) iso_8859_1_a[i]; + size_t inbytesleft = strlen(inbuf); char outbuf0[24]; + char outbuf1[10]; char *outbuf = outbuf0; size_t outbytesleft = sizeof(outbuf0); + cd = yaz_iconv_open(tmpcode, "ISO-8859-1"); + YAZ_CHECK(cd || !mandatory); + if (!cd) + return; r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + yaz_iconv_close(cd); if (r == (size_t) (-1)) - { - int e = yaz_iconv_error(cd); + return; + + cd = yaz_iconv_open("ISO-8859-1", tmpcode); + YAZ_CHECK(cd || !mandatory); + if (!cd) + return; + inbuf = outbuf0; + inbytesleft = sizeof(outbuf0) - outbytesleft; + + outbuf = outbuf1; + outbytesleft = sizeof(outbuf1); + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); - printf ("tsticonv 31 i=%d e=%d\n", i, e); - exit(31); + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + if (r == (size_t)(-1)) + { + fprintf(stderr, "failed\n"); } - len = outbuf - outbuf0; - if (len != strlen(utf8_c[i]) || memcmp(outbuf0, utf8_c[i], len)) + YAZ_CHECK(r != (size_t) (-1)); + + if (r != (size_t)(-1)) { - printf ("tsticonv 32 len=%d gotlen=%d i=%d\n", - strlen(utf8_c[i]), len, i); - exit(32); + ret = compare_buffers("dconvert", i, + strlen(iso_8859_1_a[i]), iso_8859_1_a[i], + sizeof(outbuf1) - outbytesleft, outbuf1); + YAZ_CHECK(ret); } + yaz_iconv_close(cd); } +} + +int utf8_check(unsigned c) +{ + if (sizeof(c) >= 4) + { + size_t r; + char src[4]; + char dst[4]; + char utf8buf[6]; + char *inbuf = src; + size_t inbytesleft = 4; + char *outbuf = utf8buf; + size_t outbytesleft = sizeof(utf8buf); + int i; + yaz_iconv_t cd = yaz_iconv_open("UTF-8", "UCS4LE"); + if (!cd) + return 0; + for (i = 0; i<4; i++) + src[i] = c >> (i*8); + + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + yaz_iconv_close(cd); + + if (r == (size_t)(-1)) + return 0; + + cd = yaz_iconv_open("UCS4LE", "UTF-8"); + if (!cd) + return 0; + inbytesleft = sizeof(utf8buf) - outbytesleft; + inbuf = utf8buf; + + outbuf = dst; + outbytesleft = 4; + + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (r == (size_t)(-1)) + return 0; + + yaz_iconv_close(cd); + + if (memcmp(src, dst, 4)) + return 0; + } + return 1; +} + +static void tst_marc8_to_utf8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + /* bug #2115 */ + YAZ_CHECK(tst_convert(cd, ESC "(N" ESC ")Qp" ESC "(B", "\xd0\x9f")); + + + YAZ_CHECK(tst_convert(cd, "Cours de math", + "Cours de math")); + /* COMBINING ACUTE ACCENT */ + YAZ_CHECK(tst_convert(cd, "Cours de mathâe", + "Cours de mathe\xcc\x81")); + + YAZ_CHECK(tst_convert(cd, "a\xea\x1e", "a\x1e\xcc\x8a")); + + YAZ_CHECK(tst_convert(cd, "a\xea", "a")); + yaz_iconv_close(cd); } -static void dconvert(int mandatory, const char *tmpcode) +static void tst_marc8s_to_utf8(void) { - int i; - yaz_iconv_t cd; - for (i = 0; iso_8859_1_a[i]; i++) + yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8s"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours de math", + "Cours de math")); + /* E9: LATIN SMALL LETTER E WITH ACUTE */ + YAZ_CHECK(tst_convert(cd, "Cours de mathâe", + "Cours de math\xc3\xa9")); + + yaz_iconv_close(cd); +} + + +static void tst_marc8_to_latin1(void) +{ + yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "ax", "ax")); + + /* latin capital letter o with stroke */ + YAZ_CHECK(tst_convert(cd, "\xa2", "\xd8")); + + /* with latin small letter ae */ + YAZ_CHECK(tst_convert(cd, "eneb\xb5r", "eneb\346r")); + + YAZ_CHECK(tst_convert(cd, "\xea" "a\xa2", "\xe5" "\xd8")); + + YAZ_CHECK(tst_convert(cd, "\xea" "a\xa2" "b", "\xe5" "\xd8" "b")); + + YAZ_CHECK(tst_convert(cd, "\xea" "a" "\xea" "a", "\xe5" "\xe5")); + + YAZ_CHECK(tst_convert(cd, "Cours de math", + "Cours de math")); + YAZ_CHECK(tst_convert(cd, "Cours de mathâe", + "Cours de mathé")); + YAZ_CHECK(tst_convert(cd, "12345678âe", + "12345678é")); + YAZ_CHECK(tst_convert(cd, "123456789âe", + "123456789é")); + YAZ_CHECK(tst_convert(cd, "1234567890âe", + "1234567890é")); + YAZ_CHECK(tst_convert(cd, "12345678901âe", + "12345678901é")); + YAZ_CHECK(tst_convert(cd, "Cours de mathâem", + "Cours de mathém")); + YAZ_CHECK(tst_convert(cd, "Cours de mathâematiques", + "Cours de mathématiques")); + + yaz_iconv_close(cd); +} + +static void tst_utf8_to_marc8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + + /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */ + YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat")); + + /** Pure ASCII. 12 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); + + /** Pure ASCII. 13 characters (sizeof(outbuf)+1) */ + YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); + + /** UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "S\xc3\x98", "S\xa2")); + + /** ARING */ + YAZ_CHECK(tst_convert(cd, "A" "\xCC\x8A", "\xEA" "A")); + + /** A MACRON + UMLAUT, DIAERESIS */ + YAZ_CHECK(tst_convert(cd, "A" "\xCC\x84" "\xCC\x88", + "\xE5\xE8\x41")); + + /* Ligature spanning two characters */ + YAZ_CHECK(tst_convert(cd, + "\x74" "\xCD\xA1" "\x73", /* UTF-8 */ + "\xEB\x74\xEC\x73")); /* MARC-8 */ + + /* Double title spanning two characters */ + YAZ_CHECK(tst_convert(cd, + "\x74" "\xCD\xA0" "\x73", /* UTF-8 */ + "\xFA\x74\xFB\x73")); /* MARC-8 */ + + /** Ideographic question mark (Unicode FF1F) */ + YAZ_CHECK(tst_convert(cd, + "\xEF\xBC\x9F" "o", /* UTF-8 */ + "\033$1" "\x21\x2B\x3B" "\033(B" "o" )); + + + /** Ideographic space per ANSI Z39.64 */ + YAZ_CHECK(tst_convert(cd, + "\xe3\x80\x80" "o", /* UTF-8 */ + "\033$1" "\x21\x23\x21" "\033(B" "o" )); + + /** Superscript 0 . bug #642 */ + YAZ_CHECK(tst_convert(cd, + "(\xe2\x81\xb0)", /* UTF-8 */ + "(\033p0\x1bs)")); + + + /** bug #1778 */ + YAZ_CHECK(tst_convert(cd, + /* offset 0x530 in UTF-8 rec marccol4.u8.marc */ + "\xE3\x83\xB3" "\xE3\x82\xBF" + "\xCC\x84" "\xCC\x84" "\xE3\x83\xBC" /* UTF-8 */, + "\x1B\x24\x31" "\x69\x25\x73" + "\x1B\x28\x42" "\xE5\xE5" "\x1B\x24\x31" + "\x69\x25\x3F" + "\x69\x21\x3C" "\x1B\x28\x42")); + + + /** bug #2120 */ + YAZ_CHECK(tst_convert(cd, + "\xCE\x94\xCE\xB5\xCF\x84" + "\xCE\xBF\xCF\x81\xCE\xB1" + "\xCE\xBA\xCE\xB7\xCF\x82\x2C", + + "\x1B\x28\x53\x45\x66\x78\x72\x75" + "\x61\x6D\x6A\x77" + "\x1B\x28\x42\x2C" + )); + { + char *inbuf0 = "\xe2\x81\xb0"; + char *inbuf = inbuf0; + size_t inbytesleft = strlen(inbuf); + char outbuf0[64]; + char *outbuf = outbuf0; + size_t outbytesleft = sizeof(outbuf0)-1; size_t r; - char *inbuf = (char*) iso_8859_1_a[i]; - size_t inbytesleft = strlen(inbuf); - char outbuf0[24]; - char outbuf1[10]; - char *outbuf = outbuf0; - size_t outbytesleft = sizeof(outbuf0); +#if 0 + int i; +#endif + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); - cd = yaz_iconv_open(tmpcode, "ISO-8859-1"); - if (!cd) +#if 0 + *outbuf = '\0'; /* so we know when to stop printing */ + for (i = 0; outbuf0[i]; i++) { - if (!mandatory) - return; - printf ("tsticonv code=%s 1\n", tmpcode); - exit(1); + int ch = outbuf0[i] & 0xff; + yaz_log(YLOG_LOG, "ch%d %02X %c", i, ch, ch >= ' ' ? ch : '?'); } - r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - if (r == (size_t)(-1)) - { - int e = yaz_iconv_error(cd); +#endif - printf ("tsticonv code=%s 2 e=%d\n", tmpcode, e); - exit(2); - } - yaz_iconv_close(cd); - - cd = yaz_iconv_open("ISO-8859-1", tmpcode); - if (!cd) + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + *outbuf = '\0'; /* for strcmp test below and printing */ +#if 0 + for (i = 0; outbuf0[i]; i++) { - if (!mandatory) - return; - printf ("tsticonv code=%s 3\n", tmpcode); - exit(3); + int ch = outbuf0[i] & 0xff; + yaz_log(YLOG_LOG, "ch%d %02X %c", i, ch, ch >= ' ' ? ch : '?'); } - inbuf = outbuf0; - inbytesleft = sizeof(outbuf0) - outbytesleft; +#endif + YAZ_CHECK(strcmp("\033p0\x1bs", outbuf0) == 0); + } + yaz_iconv_close(cd); +} - outbuf = outbuf1; - outbytesleft = sizeof(outbuf1); - r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - if (r == (size_t)(-1)) { - int e = yaz_iconv_error(cd); +static void tst_advance_to_utf8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("utf-8", "advancegreek"); - printf ("tsticonv code=%s 4 e=%d\n", tmpcode, e); - exit(4); - } - if (strlen(iso_8859_1_a[i]) == - (sizeof(outbuf1) - outbytesleft) && - memcmp(outbuf1, iso_8859_1_a[i], - strlen(iso_8859_1_a[i]))) - { - printf ("tsticonv code=%s 5\n", tmpcode); - exit(5); - } - yaz_iconv_close(cd); - } + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + yaz_iconv_close(cd); +} + +static void tst_utf8_to_advance(void) +{ + yaz_iconv_t cd = yaz_iconv_open("advancegreek", "utf-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + yaz_iconv_close(cd); +} + +static void tst_latin1_to_marc8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + + /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */ + YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat")); + + /** Pure ASCII. 12 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); + + /** Pure ASCII. 13 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); + + /** D8: UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "S\xd8", "S\xa2")); + + /** E9: LATIN SMALL LETTER E WITH ACUTE */ + YAZ_CHECK(tst_convert(cd, "Cours de math\xe9", "Cours de mathâe")); + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math" + )); + YAZ_CHECK(tst_convert(cd, "Cours de mathé", "Cours de mathâe" )); + YAZ_CHECK(tst_convert(cd, "12345678é","12345678âe")); + YAZ_CHECK(tst_convert(cd, "123456789é", "123456789âe")); + YAZ_CHECK(tst_convert(cd, "1234567890é","1234567890âe")); + YAZ_CHECK(tst_convert(cd, "12345678901é", "12345678901âe")); + YAZ_CHECK(tst_convert(cd, "Cours de mathém", "Cours de mathâem")); + YAZ_CHECK(tst_convert(cd, "Cours de mathématiques", + "Cours de mathâematiques")); + yaz_iconv_close(cd); +} + +static void tst_utf8_codes(void) +{ + YAZ_CHECK(utf8_check(3)); + YAZ_CHECK(utf8_check(127)); + YAZ_CHECK(utf8_check(128)); + YAZ_CHECK(utf8_check(255)); + YAZ_CHECK(utf8_check(256)); + YAZ_CHECK(utf8_check(900)); + YAZ_CHECK(utf8_check(1000)); + YAZ_CHECK(utf8_check(10000)); + YAZ_CHECK(utf8_check(100000)); + YAZ_CHECK(utf8_check(1000000)); + YAZ_CHECK(utf8_check(10000000)); + YAZ_CHECK(utf8_check(100000000)); } - + int main (int argc, char **argv) { + YAZ_CHECK_INIT(argc, argv); + + tst_utf8_codes(); + + tst_marc8_to_utf8(); + + tst_marc8s_to_utf8(); + + tst_marc8_to_latin1(); + + tst_advance_to_utf8(); + tst_utf8_to_advance(); + + tst_utf8_to_marc8(); + + tst_latin1_to_marc8(); + + tst_marc8_to_ucs4b(); + tst_ucs4b_to_utf8(); + dconvert(1, "UTF-8"); dconvert(1, "ISO-8859-1"); dconvert(1, "UCS4"); dconvert(1, "UCS4LE"); dconvert(0, "CP865"); - marc8_tst_a(); - marc8_tst_b(); - marc8_tst_c(); - exit (0); + + YAZ_CHECK_TERM; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */