From f8ff51d46ab411a8b4244becf67c9748ef5550a7 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 28 Nov 2013 14:27:14 +0100 Subject: [PATCH] danmarc iconv: fixes and more tests MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 0xa733 is converted to @å , 0xa732 is converted to @Å instead of @a733, @a732 . --- src/iconv_decode_danmarc.c | 10 ++++------ src/iconv_encode_danmarc.c | 28 ++++++++++++++++++++++------ test/test_iconv.c | 29 +++++++++++++++++++++++------ 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/src/iconv_decode_danmarc.c b/src/iconv_decode_danmarc.c index 4105137..ee87057 100644 --- a/src/iconv_decode_danmarc.c +++ b/src/iconv_decode_danmarc.c @@ -57,14 +57,12 @@ static unsigned long read_danmarc(yaz_iconv_t cd, *no_read = 2; break; case 0xe5: /* LATIN SMALL LETTER A WITH RING ABOVE */ - x = 'a'; - data->x_back = 'a'; - *no_read = 1; + x = 0xa733; + *no_read = 2; break; case 0xc5: /* LATIN CAPITAL LETTER A WITH RING ABOVE */ - x = 'A'; - data->x_back = 'a'; - *no_read = 1; + x = 0xa732; + *no_read = 2; break; default: if (inbytesleft < 5) diff --git a/src/iconv_encode_danmarc.c b/src/iconv_encode_danmarc.c index 1afcf7c..467008a 100644 --- a/src/iconv_encode_danmarc.c +++ b/src/iconv_encode_danmarc.c @@ -25,14 +25,14 @@ static size_t write_danmarc(yaz_iconv_t cd, yaz_iconv_encoder_t en, { unsigned char *outp = (unsigned char *) *outbuf; - if (x == '@') + if (x == '@' || x == '*' || x == 0xa4) { if (*outbytesleft < 2) { yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); return (size_t)(-1); } - *outp++ = x; + *outp++ = '@'; (*outbytesleft)--; *outp++ = x; (*outbytesleft)--; @@ -48,15 +48,31 @@ static size_t write_danmarc(yaz_iconv_t cd, yaz_iconv_encoder_t en, (*outbytesleft)--; } else - { /* full unicode, emit @XXXX */ + { if (*outbytesleft < 6) { yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); return (size_t)(-1); } - sprintf(*outbuf, "@%04lX", x); - outp += 5; - (*outbytesleft) -= 5; + switch (x) + { + case 0xa733: + *outp++ = '@'; + *outp++ = 0xe5; + (*outbytesleft) -= 2; + break; + case 0xa732: + *outp++ = '@'; + *outp++ = 0xc5; + (*outbytesleft) -= 2; + break; + default: + /* full unicode, emit @XXXX */ + sprintf(*outbuf, "@%04lX", x); + outp += 5; + (*outbytesleft) -= 5; + break; + } } *outbuf = (char *) outp; return 0; diff --git a/test/test_iconv.c b/test/test_iconv.c index e341972..ab05cee 100644 --- a/test/test_iconv.c +++ b/test/test_iconv.c @@ -681,9 +681,9 @@ static void tst_utf8_codes(void) YAZ_CHECK(utf8_check(100000000)); } -static void tst_danmarc_to_latin1(void) +static void tst_danmarc_to_utf8(void) { - yaz_iconv_t cd = yaz_iconv_open("iso-8859-1", "danmarc"); + yaz_iconv_t cd = yaz_iconv_open("utf-8", "danmarc"); YAZ_CHECK(cd); if (!cd) @@ -693,10 +693,17 @@ static void tst_danmarc_to_latin1(void) YAZ_CHECK(tst_convert(cd, "a@@b", "a@b")); YAZ_CHECK(tst_convert(cd, "a@@@@b", "a@@b")); - YAZ_CHECK(tst_convert(cd, "@000ab", "\nb")); - YAZ_CHECK(tst_convert(cd, "@\xe5", "aa")); - YAZ_CHECK(tst_convert(cd, "@\xc5.", "Aa.")); + YAZ_CHECK(tst_convert(cd, "@*", "*")); + YAZ_CHECK(tst_convert(cd, "@@", "@")); + YAZ_CHECK(tst_convert(cd, "@\xa4", "\xC2\xA4")); + YAZ_CHECK(tst_convert(cd, "@\xe5", "\xEA\x9C\xB3")); + YAZ_CHECK(tst_convert(cd, "@\xc5.", "\xEA\x9C\xB2" ".")); + + YAZ_CHECK(tst_convert(cd, "@a733", "\xEA\x9C\xB3")); + YAZ_CHECK(tst_convert(cd, "@a732.", "\xEA\x9C\xB2" ".")); + + YAZ_CHECK(tst_convert(cd, "a@03BBb", "a\xce\xbb" "b")); /* lambda */ yaz_iconv_close(cd); } @@ -710,10 +717,20 @@ static void tst_utf8_to_danmarc(void) return; YAZ_CHECK(tst_convert(cd, "ax", "ax")); + + YAZ_CHECK(tst_convert(cd, "a@b", "a@@b")); + YAZ_CHECK(tst_convert(cd, "a@@b", "a@@@@b")); + + YAZ_CHECK(tst_convert(cd, "*", "@*")); YAZ_CHECK(tst_convert(cd, "@", "@@")); + YAZ_CHECK(tst_convert(cd, "\xC2\xA4", "@\xa4")); + YAZ_CHECK(tst_convert(cd, "a\xc3\xa5" "b", "a\xe5" "b")); /* aring */ YAZ_CHECK(tst_convert(cd, "a\xce\xbb" "b", "a@03BBb")); /* lambda */ + YAZ_CHECK(tst_convert(cd, "\xEA\x9C\xB2" ".", "@\xc5.")); + YAZ_CHECK(tst_convert(cd, "\xEA\x9C\xB3", "@\xe5")); + yaz_iconv_close(cd); } @@ -738,7 +755,7 @@ int main (int argc, char **argv) tst_utf8_to_marc8("marc8lossy"); tst_utf8_to_marc8("marc8lossless"); - tst_danmarc_to_latin1(); + tst_danmarc_to_utf8(); tst_utf8_to_danmarc(); tst_latin1_to_marc8(); -- 1.7.10.4