X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=test%2Ftsticonv.c;h=fd8f2c8de651ca9b62d974083a60c13468de1209;hp=b308a2c44565d485638f68590a17688b6b1df74c;hb=8626b7019b5d3d9c9594f20025e97d06a2d590fc;hpb=713dfee4f059554f5450ef5f914a8bc4d8648af3 diff --git a/test/tsticonv.c b/test/tsticonv.c index b308a2c..fd8f2c8 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -1,25 +1,67 @@ /* - * Copyright (c) 2002-2004, Index Data + * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.4 2004-11-16 22:51:52 adam Exp $ + * $Id: tsticonv.c,v 1.17 2006-04-19 23:15:40 adam Exp $ */ #if HAVE_CONFIG_H #include #endif +#include #include #include #include #include +#include + +static int compare_buffers(char *msg, int no, + int expect_len, const char *expect_buf, + int got_len, const char *got_buf) +{ + if (expect_len == got_len + && !memcmp(expect_buf, got_buf, expect_len)) + return 1; + + if (0) /* use 1 see how the buffers differ (for debug purposes) */ + { + int i; + printf("tsticonv test=%s i=%d failed\n", msg, no); + printf("off got exp\n"); + for (i = 0; i 12) + outbytesleft = 12; + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (r == (size_t) (-1)) + { + int e = yaz_iconv_error(cd); + YAZ_CHECK(e == YAZ_ICONV_E2BIG); + if (e != YAZ_ICONV_E2BIG) + return; + } + else + break; } + ret = compare_buffers("tsticonv 22", i, + expect_len, ar[i].ucs4_b, + outbuf - outbuf0, outbuf0); + YAZ_CHECK(ret); } yaz_iconv_close(cd); } -static void marc8_tst_c() +static void tst_ucs4b_to_utf8() { static const char *ucs4_c[] = { - "\x00\x00\xFF\x1F\x00\x00\x00o", - "\x00\x00\xAE\x0E\x00\x00\xC0\xF4", - 0 + "\x00\x00\xFF\x1F\x00\x00\x00o", + "\x00\x00\xAE\x0E\x00\x00\xC0\xF4", + 0 }; static const char *utf8_c[] = { - "\xEF\xBC\x9F\x6F", - "\xEA\xB8\x8E\xEC\x83\xB4", - 0 + "\xEF\xBC\x9F\x6F", + "\xEA\xB8\x8E\xEC\x83\xB4", + 0 }; int i; + int ret; yaz_iconv_t cd; cd = yaz_iconv_open("UTF8", "UCS4"); + YAZ_CHECK(cd); if (!cd) - { - printf ("tsticonv 30 yaz_iconv_open failed\n"); - exit(30); - } + return; for (i = 0; ucs4_c[i]; i++) { size_t r; - size_t len; char *inbuf= (char*) ucs4_c[i]; size_t inbytesleft = 8; char outbuf0[24]; @@ -179,20 +245,13 @@ static void marc8_tst_c() size_t outbytesleft = sizeof(outbuf0); r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); if (r == (size_t) (-1)) - { - int e = yaz_iconv_error(cd); - - printf ("tsticonv 31 i=%d e=%d\n", i, e); - exit(31); - } - len = outbuf - outbuf0; - if (len != strlen(utf8_c[i]) || memcmp(outbuf0, utf8_c[i], len)) - { - printf ("tsticonv 32 len=%d gotlen=%d i=%d\n", - strlen(utf8_c[i]), len, i); - exit(32); - } + return; + ret = compare_buffers("tsticonv 32", i, + strlen(utf8_c[i]), utf8_c[i], + outbuf - outbuf0, outbuf0); + YAZ_CHECK(ret); } yaz_iconv_close(cd); } @@ -200,76 +259,265 @@ static void marc8_tst_c() static void dconvert(int mandatory, const char *tmpcode) { int i; + int ret; yaz_iconv_t cd; for (i = 0; iso_8859_1_a[i]; i++) { size_t r; - char *inbuf = (char*) iso_8859_1_a[i]; - size_t inbytesleft = strlen(inbuf); - char outbuf0[24]; - char outbuf1[10]; - char *outbuf = outbuf0; - size_t outbytesleft = sizeof(outbuf0); + char *inbuf = (char*) iso_8859_1_a[i]; + size_t inbytesleft = strlen(inbuf); + char outbuf0[24]; + char outbuf1[10]; + char *outbuf = outbuf0; + size_t outbytesleft = sizeof(outbuf0); cd = yaz_iconv_open(tmpcode, "ISO-8859-1"); - if (!cd) + YAZ_CHECK(cd || !mandatory); + if (!cd) + return; + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + yaz_iconv_close(cd); + if (r == (size_t) (-1)) + return; + + cd = yaz_iconv_open("ISO-8859-1", tmpcode); + YAZ_CHECK(cd || !mandatory); + if (!cd) + return; + inbuf = outbuf0; + inbytesleft = sizeof(outbuf0) - outbytesleft; + + outbuf = outbuf1; + outbytesleft = sizeof(outbuf1); + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + if (r != (size_t)(-1)) { - if (!mandatory) - return; - printf ("tsticonv code=%s 1\n", tmpcode); - exit(1); + ret = compare_buffers("dconvert", i, + strlen(iso_8859_1_a[i]), iso_8859_1_a[i], + sizeof(outbuf1) - outbytesleft, outbuf1); + YAZ_CHECK(ret); } - r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - if (r == (size_t)(-1)) - { - int e = yaz_iconv_error(cd); + yaz_iconv_close(cd); + } +} - printf ("tsticonv code=%s 2 e=%d\n", tmpcode, e); - exit(2); - } - yaz_iconv_close(cd); +int utf8_check(unsigned c) +{ + if (sizeof(c) >= 4) + { + size_t r; + char src[4]; + char dst[4]; + char utf8buf[6]; + char *inbuf = src; + size_t inbytesleft = 4; + char *outbuf = utf8buf; + size_t outbytesleft = sizeof(utf8buf); + int i; + yaz_iconv_t cd = yaz_iconv_open("UTF-8", "UCS4LE"); + if (!cd) + return 0; + for (i = 0; i<4; i++) + src[i] = c >> (i*8); - cd = yaz_iconv_open("ISO-8859-1", tmpcode); - if (!cd) - { - if (!mandatory) - return; - printf ("tsticonv code=%s 3\n", tmpcode); - exit(3); - } - inbuf = outbuf0; - inbytesleft = sizeof(outbuf0) - outbytesleft; + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + yaz_iconv_close(cd); - outbuf = outbuf1; - outbytesleft = sizeof(outbuf1); - r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - if (r == (size_t)(-1)) { - int e = yaz_iconv_error(cd); + if (r == (size_t)(-1)) + return 0; + + cd = yaz_iconv_open("UCS4LE", "UTF-8"); + if (!cd) + return 0; + inbytesleft = sizeof(utf8buf) - outbytesleft; + inbuf = utf8buf; - printf ("tsticonv code=%s 4 e=%d\n", tmpcode, e); - exit(4); - } - if (strlen(iso_8859_1_a[i]) == - (sizeof(outbuf1) - outbytesleft) && - memcmp(outbuf1, iso_8859_1_a[i], - strlen(iso_8859_1_a[i]))) + outbuf = dst; + outbytesleft = 4; + + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (r == (size_t)(-1)) + return 0; + + yaz_iconv_close(cd); + + if (memcmp(src, dst, 4)) + return 0; + } + return 1; +} + +static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) +{ + int ret = 0; + WRBUF b = wrbuf_alloc(); + char outbuf[12]; + size_t inbytesleft = strlen(buf); + const char *inp = buf; + while (inbytesleft) + { + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft, + &outp, &outbytesleft); + if (r == (size_t) (-1)) { - printf ("tsticonv code=%s 5\n", tmpcode); - exit(5); - } - yaz_iconv_close(cd); + int e = yaz_iconv_error(cd); + if (e != YAZ_ICONV_E2BIG) + break; + } + wrbuf_write(b, outbuf, outp - outbuf); } + if (wrbuf_len(b) == strlen(cmpbuf) + && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b))) + ret = 1; + else + yaz_log(YLOG_LOG, "GOT (%.*s)", wrbuf_len(b), wrbuf_buf(b)); + wrbuf_free(b, 1); + return ret; +} + +static void tst_conversion_marc8_to_latin1() +{ + yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours de math", + "Cours de math")); + YAZ_CHECK(tst_convert(cd, "Cours de mathâe", + "Cours de mathé")); + YAZ_CHECK(tst_convert(cd, "12345678âe", + "12345678é")); + YAZ_CHECK(tst_convert(cd, "123456789âe", + "123456789é")); + YAZ_CHECK(tst_convert(cd, "1234567890âe", + "1234567890é")); + YAZ_CHECK(tst_convert(cd, "12345678901âe", + "12345678901é")); + YAZ_CHECK(tst_convert(cd, "Cours de mathâem", + "Cours de mathém")); + YAZ_CHECK(tst_convert(cd, "Cours de mathâematiques", + "Cours de mathématiques")); + + yaz_iconv_close(cd); } - + +static void tst_conversion_utf8_to_marc8() +{ + yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + + /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */ + YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat")); + + /** Pure ASCII. 12 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); + + /** Pure ASCII. 13 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); + + /** UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "S\xc3\x98", "S\xa2")); + + /** ARING */ + YAZ_CHECK(tst_convert(cd, "A" "\xCC\x8A", "\xEA" "A")); + + /** A MACRON + UMLAUT, DIAERESIS */ + YAZ_CHECK(tst_convert(cd, "A" "\xCC\x84" "\xCC\x88", + "\xE5\xE8\x41")); + + /* Ligature spanning two characters */ + YAZ_CHECK(tst_convert(cd, + "\x74" "\xCD\xA1" "\x73", /* UTF-8 */ + "\xEB\x74\xEC\x73")); /* MARC-8 */ + + /* Double title spanning two characters */ + YAZ_CHECK(tst_convert(cd, + "\x74" "\xCD\xA0" "\x73", /* UTF-8 */ + "\xFA\x74\xFB\x73")); /* MARC-8 */ + + /** Ideographic question mark (Unicode FF1F) */ + YAZ_CHECK(tst_convert(cd, + "\xEF\xBC\x9F" "o", /* UTF-8 */ + "\033(1" "\x21\x2B\x3B" "\033(B" "o" )); + + yaz_iconv_close(cd); +} + + +static void tst_conversion_latin1_to_marc8() +{ + yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + + /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */ + YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat")); + + /** Pure ASCII. 12 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); + + /** Pure ASCII. 13 characters (sizeof(outbuf)) */ + YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); + + /** UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "SØ", "S\xa2")); + + yaz_iconv_close(cd); +} + int main (int argc, char **argv) { + YAZ_CHECK_INIT(argc, argv); + + tst_conversion_marc8_to_latin1(); + + tst_conversion_utf8_to_marc8(); + + tst_conversion_latin1_to_marc8(); + + YAZ_CHECK(utf8_check(3)); + YAZ_CHECK(utf8_check(127)); + YAZ_CHECK(utf8_check(128)); + YAZ_CHECK(utf8_check(255)); + YAZ_CHECK(utf8_check(256)); + YAZ_CHECK(utf8_check(900)); + YAZ_CHECK(utf8_check(1000)); + YAZ_CHECK(utf8_check(10000)); + YAZ_CHECK(utf8_check(100000)); + YAZ_CHECK(utf8_check(1000000)); + YAZ_CHECK(utf8_check(10000000)); + YAZ_CHECK(utf8_check(100000000)); + dconvert(1, "UTF-8"); dconvert(1, "ISO-8859-1"); dconvert(1, "UCS4"); dconvert(1, "UCS4LE"); dconvert(0, "CP865"); - marc8_tst_a(); - marc8_tst_b(); - marc8_tst_c(); - exit (0); + tst_marc8_to_iso_8859_1(); + tst_marc8_to_ucs4b(); + tst_ucs4b_to_utf8(); + + YAZ_CHECK_TERM; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */