X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=test%2Ftsticonv.c;h=7c69c0b4661b7004736a663bd52648d7e44e59e6;hb=b0aea56b51603b34d526bbfb94fed2ae0976fb0f;hp=d95d798aa4179f47a32677a42c018ef374d075be;hpb=711f37334de1dde9fee4dfdee2e9263f42373494;p=yaz-moved-to-github.git diff --git a/test/tsticonv.c b/test/tsticonv.c index d95d798..7c69c0b 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.18 2006-04-24 23:21:26 adam Exp $ + * $Id: tsticonv.c,v 1.26 2007-03-12 10:59:59 adam Exp $ */ #if HAVE_CONFIG_H @@ -90,19 +90,20 @@ static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) char outbuf[12]; size_t inbytesleft = strlen(buf); const char *inp = buf; - while (inbytesleft) + int rounds = 0; + for (rounds = 0; inbytesleft && rounds < sizeof(outbuf); rounds++) { size_t outbytesleft = sizeof(outbuf); char *outp = outbuf; size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft, &outp, &outbytesleft); + wrbuf_write(b, outbuf, outp - outbuf); if (r == (size_t) (-1)) { int e = yaz_iconv_error(cd); if (e != YAZ_ICONV_E2BIG) break; } - wrbuf_write(b, outbuf, outp - outbuf); } if (wrbuf_len(b) == strlen(cmpbuf) && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b))) @@ -124,7 +125,7 @@ static const char *iso_8859_1_a[] = { "\xe5" "\xe5", 0 }; -static void tst_marc8_to_ucs4b() +static void tst_marc8_to_ucs4b(void) { yaz_iconv_t cd = yaz_iconv_open("UCS4", "MARC8"); YAZ_CHECK(cd); @@ -222,7 +223,7 @@ static void tst_marc8_to_ucs4b() yaz_iconv_close(cd); } -static void tst_ucs4b_to_utf8() +static void tst_ucs4b_to_utf8(void) { yaz_iconv_t cd = yaz_iconv_open("UTF8", "UCS4"); YAZ_CHECK(cd); @@ -337,7 +338,7 @@ int utf8_check(unsigned c) return 1; } -static void tst_marc8_to_utf8() +static void tst_marc8_to_utf8(void) { yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8"); @@ -350,10 +351,15 @@ static void tst_marc8_to_utf8() /* COMBINING ACUTE ACCENT */ YAZ_CHECK(tst_convert(cd, "Cours de mathâe", "Cours de mathe\xcc\x81")); + + + YAZ_CHECK(tst_convert(cd, "a\xea\x1e", "a\x1e\xcc\x8a")); + + YAZ_CHECK(tst_convert(cd, "a\xea", "a")); yaz_iconv_close(cd); } -static void tst_marc8s_to_utf8() +static void tst_marc8s_to_utf8(void) { yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8s"); @@ -371,7 +377,7 @@ static void tst_marc8s_to_utf8() } -static void tst_marc8_to_latin1() +static void tst_marc8_to_latin1(void) { yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8"); @@ -413,7 +419,7 @@ static void tst_marc8_to_latin1() yaz_iconv_close(cd); } -static void tst_utf8_to_marc8() +static void tst_utf8_to_marc8(void) { yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8"); @@ -455,13 +461,43 @@ static void tst_utf8_to_marc8() /** Ideographic question mark (Unicode FF1F) */ YAZ_CHECK(tst_convert(cd, "\xEF\xBC\x9F" "o", /* UTF-8 */ - "\033(1" "\x21\x2B\x3B" "\033(B" "o" )); + "\033$1" "\x21\x2B\x3B" "\033(B" "o" )); + + /** Superscript 0 . bug #642 */ + YAZ_CHECK(tst_convert(cd, + "(\xe2\x81\xb0)", /* UTF-8 */ + "(\033p0\x1bs)")); + + yaz_iconv_close(cd); } +static void tst_advance_to_utf8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("utf-8", "advancegreek"); + + YAZ_CHECK(cd); + if (!cd) + return; -static void tst_latin1_to_marc8() + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + yaz_iconv_close(cd); +} + +static void tst_utf8_to_advance(void) +{ + yaz_iconv_t cd = yaz_iconv_open("advancegreek", "utf-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + yaz_iconv_close(cd); +} + +static void tst_latin1_to_marc8(void) { yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1"); @@ -480,13 +516,25 @@ static void tst_latin1_to_marc8() /** Pure ASCII. 13 characters (sizeof(outbuf)) */ YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); - /** UPPERCASE SCANDINAVIAN O */ - YAZ_CHECK(tst_convert(cd, "SØ", "S\xa2")); - + /** D8: UPPERCASE SCANDINAVIAN O */ + YAZ_CHECK(tst_convert(cd, "S\xd8", "S\xa2")); + + /** E9: LATIN SMALL LETTER E WITH ACUTE */ + YAZ_CHECK(tst_convert(cd, "Cours de math\xe9", "Cours de mathâe")); + YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math" + )); + YAZ_CHECK(tst_convert(cd, "Cours de mathé", "Cours de mathâe" )); + YAZ_CHECK(tst_convert(cd, "12345678é","12345678âe")); + YAZ_CHECK(tst_convert(cd, "123456789é", "123456789âe")); + YAZ_CHECK(tst_convert(cd, "1234567890é","1234567890âe")); + YAZ_CHECK(tst_convert(cd, "12345678901é", "12345678901âe")); + YAZ_CHECK(tst_convert(cd, "Cours de mathém", "Cours de mathâem")); + YAZ_CHECK(tst_convert(cd, "Cours de mathématiques", + "Cours de mathâematiques")); yaz_iconv_close(cd); } -static void tst_utf8_codes() +static void tst_utf8_codes(void) { YAZ_CHECK(utf8_check(3)); YAZ_CHECK(utf8_check(127)); @@ -514,6 +562,9 @@ int main (int argc, char **argv) tst_marc8_to_latin1(); + tst_advance_to_utf8(); + tst_utf8_to_advance(); + tst_utf8_to_marc8(); tst_latin1_to_marc8();