X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=test%2Ftsticonv.c;h=5bc649a4e0f54c7b0a0c68e0e767b24b69d0a318;hp=d1ce8ccfee6bc1dc7de3b399656ed50e4fc71562;hb=dda6b9f4ca21d43c68d4c667b4d8e6d1bdaba095;hpb=8d691989077a0addcbd840d769dce6700f3d9622 diff --git a/test/tsticonv.c b/test/tsticonv.c index d1ce8cc..5bc649a 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2007, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2008 Index Data * See the file LICENSE for details. - * - * $Id: tsticonv.c,v 1.24 2007-01-03 08:42:16 adam Exp $ */ #if HAVE_CONFIG_H @@ -17,6 +15,8 @@ #include #include +#define ESC "\x1b" + static int compare_buffers(char *msg, int no, int expect_len, const char *expect_buf, int got_len, const char *got_buf) @@ -76,43 +76,87 @@ static int tst_convert_l(yaz_iconv_t cd, size_t in_len, const char *in_buf, return 0; } else + { + yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); break; + } } + return compare_buffers("tsticonv 22", 0, expect_len, expect_buf, outbuf - outbuf0, outbuf0); } -static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) +static int tst_convert_x(yaz_iconv_t cd, const char *buf, const char *cmpbuf, + int expect_error) { - int ret = 0; + int ret = 1; WRBUF b = wrbuf_alloc(); - char outbuf[12]; + char outbuf[16]; size_t inbytesleft = strlen(buf); const char *inp = buf; - while (inbytesleft) + int rounds = 0; + for (rounds = 0; inbytesleft && rounds < sizeof(outbuf); rounds++) { size_t outbytesleft = sizeof(outbuf); char *outp = outbuf; size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft, &outp, &outbytesleft); + wrbuf_write(b, outbuf, outp - outbuf); if (r == (size_t) (-1)) { int e = yaz_iconv_error(cd); if (e != YAZ_ICONV_E2BIG) + { + if (expect_error != -1) + if (e != expect_error) + ret = 0; break; + } + } + else + { + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft); + wrbuf_write(b, outbuf, outp - outbuf); + if (expect_error != -1) + if (expect_error) + ret = 0; + break; } - wrbuf_write(b, outbuf, outp - outbuf); } if (wrbuf_len(b) == strlen(cmpbuf) && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b))) - ret = 1; + ; else - yaz_log(YLOG_LOG, "GOT (%.*s)", wrbuf_len(b), wrbuf_buf(b)); - wrbuf_free(b, 1); + { + WRBUF w = wrbuf_alloc(); + + ret = 0; + wrbuf_rewind(w); + wrbuf_puts_escaped(w, buf); + yaz_log(YLOG_LOG, "input %s", wrbuf_cstr(w)); + + wrbuf_rewind(w); + wrbuf_write_escaped(w, wrbuf_buf(b), wrbuf_len(b)); + yaz_log(YLOG_LOG, "got %s", wrbuf_cstr(w)); + + wrbuf_rewind(w); + wrbuf_puts_escaped(w, cmpbuf); + yaz_log(YLOG_LOG, "exp %s", wrbuf_cstr(w)); + + wrbuf_destroy(w); + } + + wrbuf_destroy(b); return ret; } +static int tst_convert(yaz_iconv_t cd, const char *buf, const char *cmpbuf) +{ + return tst_convert_x(cd, buf, cmpbuf, 0); +} /* some test strings in ISO-8859-1 format */ static const char *iso_8859_1_a[] = { @@ -265,6 +309,9 @@ static void dconvert(int mandatory, const char *tmpcode) return; r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); YAZ_CHECK(r != (size_t) (-1)); + + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); yaz_iconv_close(cd); if (r == (size_t) (-1)) return; @@ -280,11 +327,19 @@ static void dconvert(int mandatory, const char *tmpcode) outbytesleft = sizeof(outbuf1); r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); YAZ_CHECK(r != (size_t) (-1)); + + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + if (r == (size_t)(-1)) + { + fprintf(stderr, "failed\n"); + } + YAZ_CHECK(r != (size_t) (-1)); + if (r != (size_t)(-1)) { ret = compare_buffers("dconvert", i, strlen(iso_8859_1_a[i]), iso_8859_1_a[i], - sizeof(outbuf1) - outbytesleft, outbuf1); + sizeof(outbuf1) - outbytesleft, outbuf1); YAZ_CHECK(ret); } yaz_iconv_close(cd); @@ -350,6 +405,25 @@ static void tst_marc8_to_utf8(void) /* COMBINING ACUTE ACCENT */ YAZ_CHECK(tst_convert(cd, "Cours de mathâe", "Cours de mathe\xcc\x81")); + + YAZ_CHECK(tst_convert(cd, "\xea" "a", "a\xcc\x8a")); + YAZ_CHECK(tst_convert(cd, "a" "\xea" "\x1e", "a" "\x1e\xcc\x8a")); + YAZ_CHECK(tst_convert(cd, "a" "\xea" "p", "a" "p\xcc\x8a")); + + YAZ_CHECK(tst_convert_x(cd, "a\xea", "a", YAZ_ICONV_EINVAL)); + YAZ_CHECK(tst_convert(cd, "p", "\xcc\x8a")); /* note: missing p */ + yaz_iconv(cd, 0, 0, 0, 0); /* incomplete. so we have to reset */ + + /* bug #2115 */ + YAZ_CHECK(tst_convert(cd, ESC "(N" ESC ")Qp" ESC "(B", "\xd0\x9f")); + + YAZ_CHECK(tst_convert_x(cd, ESC , "", YAZ_ICONV_EINVAL)); + YAZ_CHECK(tst_convert_x(cd, ESC "(", "", YAZ_ICONV_EINVAL)); + YAZ_CHECK(tst_convert_x(cd, ESC "(B", "", 0)); + + YAZ_CHECK(tst_convert(cd, ESC "(B" "\x31", "1")); /* ASCII in G0 */ + YAZ_CHECK(tst_convert(cd, ESC ")B" "\xB1", "1")); /* ASCII in G1 */ + yaz_iconv_close(cd); } @@ -413,9 +487,9 @@ static void tst_marc8_to_latin1(void) yaz_iconv_close(cd); } -static void tst_utf8_to_marc8(void) +static void tst_utf8_to_marc8(const char *marc8_type) { - yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8"); + yaz_iconv_t cd = yaz_iconv_open(marc8_type, "UTF-8"); YAZ_CHECK(cd); if (!cd) @@ -429,7 +503,7 @@ static void tst_utf8_to_marc8(void) /** Pure ASCII. 12 characters (sizeof(outbuf)) */ YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math")); - /** Pure ASCII. 13 characters (sizeof(outbuf)) */ + /** Pure ASCII. 13 characters (sizeof(outbuf)+1) */ YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math.")); /** UPPERCASE SCANDINAVIAN O */ @@ -458,15 +532,101 @@ static void tst_utf8_to_marc8(void) "\033$1" "\x21\x2B\x3B" "\033(B" "o" )); + /** Ideographic space per ANSI Z39.64 */ + YAZ_CHECK(tst_convert(cd, + "\xe3\x80\x80" "o", /* UTF-8 */ + "\033$1" "\x21\x23\x21" "\033(B" "o" )); + /** Superscript 0 . bug #642 */ YAZ_CHECK(tst_convert(cd, "(\xe2\x81\xb0)", /* UTF-8 */ "(\033p0\x1bs)")); + + /** bug #1778 */ + YAZ_CHECK(tst_convert(cd, + /* offset 0x530 in UTF-8 rec marccol4.u8.marc */ + "\xE3\x83\xB3" "\xE3\x82\xBF" + "\xCC\x84" "\xCC\x84" "\xE3\x83\xBC" /* UTF-8 */, + "\x1B\x24\x31" "\x69\x25\x73" + "\x1B\x28\x42" "\xE5\xE5" "\x1B\x24\x31" + "\x69\x25\x3F" + "\x69\x21\x3C" "\x1B\x28\x42")); + + + /** bug #2120 */ + YAZ_CHECK(tst_convert(cd, + "\xCE\x94\xCE\xB5\xCF\x84" + "\xCE\xBF\xCF\x81\xCE\xB1" + "\xCE\xBA\xCE\xB7\xCF\x82\x2C", + + "\x1B\x28\x53\x45\x66\x78\x72\x75" + "\x61\x6D\x6A\x77" + "\x1B\x28\x42\x2C" + )); + { + char *inbuf0 = "\xe2\x81\xb0"; + char *inbuf = inbuf0; + size_t inbytesleft = strlen(inbuf); + char outbuf0[64]; + char *outbuf = outbuf0; + size_t outbytesleft = sizeof(outbuf0)-1; + size_t r; +#if 0 + int i; +#endif + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + +#if 0 + *outbuf = '\0'; /* so we know when to stop printing */ + for (i = 0; outbuf0[i]; i++) + { + int ch = outbuf0[i] & 0xff; + yaz_log(YLOG_LOG, "ch%d %02X %c", i, ch, ch >= ' ' ? ch : '?'); + } +#endif + + r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft); + YAZ_CHECK(r != (size_t) (-1)); + *outbuf = '\0'; /* for strcmp test below and printing */ +#if 0 + for (i = 0; outbuf0[i]; i++) + { + int ch = outbuf0[i] & 0xff; + yaz_log(YLOG_LOG, "ch%d %02X %c", i, ch, ch >= ' ' ? ch : '?'); + } +#endif + YAZ_CHECK(strcmp("\033p0\x1bs", outbuf0) == 0); + } + yaz_iconv(cd, 0, 0, 0, 0); + yaz_iconv_close(cd); +} + +static void tst_advance_to_utf8(void) +{ + yaz_iconv_t cd = yaz_iconv_open("utf-8", "advancegreek"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); yaz_iconv_close(cd); } +static void tst_utf8_to_advance(void) +{ + yaz_iconv_t cd = yaz_iconv_open("advancegreek", "utf-8"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "Cours ", "Cours ")); + yaz_iconv_close(cd); +} static void tst_latin1_to_marc8(void) { @@ -521,6 +681,27 @@ static void tst_utf8_codes(void) YAZ_CHECK(utf8_check(100000000)); } +static void tst_danmarc_to_latin1(void) +{ + yaz_iconv_t cd = yaz_iconv_open("iso-8859-1", "danmarc"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "ax", "ax")); + + YAZ_CHECK(tst_convert(cd, "a@@b", "a@b")); + YAZ_CHECK(tst_convert(cd, "a@@@@b", "a@@b")); + YAZ_CHECK(tst_convert(cd, "@000ab", "\nb")); + + YAZ_CHECK(tst_convert(cd, "@\xe5", "aa")); + YAZ_CHECK(tst_convert(cd, "@\xc5.", "Aa.")); + + yaz_iconv_close(cd); +} + + int main (int argc, char **argv) { YAZ_CHECK_INIT(argc, argv); @@ -533,7 +714,14 @@ int main (int argc, char **argv) tst_marc8_to_latin1(); - tst_utf8_to_marc8(); + tst_advance_to_utf8(); + tst_utf8_to_advance(); + + tst_utf8_to_marc8("marc8"); + tst_utf8_to_marc8("marc8lossy"); + tst_utf8_to_marc8("marc8lossless"); + + tst_danmarc_to_latin1(); tst_latin1_to_marc8();