X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=test%2Ftest_icu.c;h=de3112150ca3af44f9b76fb83a5964f40a51d779;hp=abb746282057b93c8f9e8120880886d586284d93;hb=5242cb5a8634bfa38b9333ff7f903e718ac6e292;hpb=7b27a8f378d73a86e8ff5e4fa3285117362481c5 diff --git a/test/test_icu.c b/test/test_icu.c index abb7462..de31121 100644 --- a/test/test_icu.c +++ b/test/test_icu.c @@ -4,7 +4,7 @@ */ /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */ - + #if HAVE_CONFIG_H #include "config.h" @@ -44,8 +44,8 @@ static int icu_termmap_cmp(const void *vp1, const void *vp2) struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2; int cmp = 0; - - cmp = strcmp((const char *)itmp1->sort_key, + + cmp = strcmp((const char *)itmp1->sort_key, (const char *)itmp2->sort_key); return cmp; } @@ -71,12 +71,12 @@ static int test_icu_casemap(const char * locale, char action, /* perform case mapping */ icu_utf16_casemap(dest16, src16, locale, action, &status); - + /* converting to UTF8 */ icu_utf16_to_utf8(dest8, dest16, &status); /* determine success */ - if (dest8->utf8 + if (dest8->utf8 && (dest8->utf8_len == strlen(chk8cstr)) && !strcmp(chk8cstr, (const char *) dest8->utf8)) success = 1; @@ -89,18 +89,18 @@ static int test_icu_casemap(const char * locale, char action, yaz_log(YLOG_WARN, "test_icu_casemap failed"); yaz_log(YLOG_LOG, "Original string: '%s' (%d)", src8cstr, src8cstr_len); - yaz_log(YLOG_LOG, "icu_casemap '%s:%c' '%s' (%d)", + yaz_log(YLOG_LOG, "icu_casemap '%s:%c' '%s' (%d)", locale, action, dest8->utf8, dest8->utf8_len); yaz_log(YLOG_LOG, "expected string: '%s' (%d)", chk8cstr, chk8cstr_len); } - + /* clean the buffers */ icu_buf_utf8_destroy(src8); icu_buf_utf8_destroy(dest8); icu_buf_utf16_destroy(src16); icu_buf_utf16_destroy(dest16); - + return success; } @@ -110,38 +110,38 @@ static void check_icu_casemap(void) /* successful tests */ YAZ_CHECK(test_icu_casemap("en", 'l', - "A ReD fOx hunTS sQUirriLs", + "A ReD fOx hunTS sQUirriLs", "a red fox hunts squirrils")); - + YAZ_CHECK(test_icu_casemap("en", 'u', - "A ReD fOx hunTS sQUirriLs", + "A ReD fOx hunTS sQUirriLs", "A RED FOX HUNTS SQUIRRILS")); - + YAZ_CHECK(test_icu_casemap("en", 'f', - "A ReD fOx hunTS sQUirriLs", + "A ReD fOx hunTS sQUirriLs", "a red fox hunts squirrils")); - + YAZ_CHECK(test_icu_casemap("en", 't', - "A ReD fOx hunTS sQUirriLs", + "A ReD fOx hunTS sQUirriLs", "A Red Fox Hunts Squirrils")); - + /* Locale 'da' */ /* success expected */ YAZ_CHECK(test_icu_casemap("da", 'l', - "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", "åh æble, øs fløde i åen efter blåbærgrøden")); YAZ_CHECK(test_icu_casemap("da", 'u', - "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", "ÅH ÆBLE, ØS FLØDE I ÅEN EFTER BLÅBÆRGRØDEN")); YAZ_CHECK(test_icu_casemap("da", 'f', - "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", "åh æble, øs fløde i åen efter blåbærgrøden")); YAZ_CHECK(test_icu_casemap("da", 't', - "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", + "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN", "Åh Æble, Øs Fløde I Åen Efter Blåbærgrøden")); /* Locale 'de' */ @@ -179,20 +179,20 @@ static int test_icu_sortmap(const char * locale, int src_list_len, struct icu_termmap * list[src_list_len]; - UCollator *coll = ucol_open(locale, &status); + UCollator *coll = ucol_open(locale, &status); icu_check_status(status); if (U_FAILURE(status)) return 0; /* assigning display terms and sort keys using buf 8 and buf16 */ - for (i = 0; i < src_list_len; i++) + for (i = 0; i < src_list_len; i++) { list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap)); /* copy display term */ - strcpy(list[i]->disp_term, src_list[i]); + strcpy(list[i]->disp_term, src_list[i]); /* transforming to UTF16 */ icu_utf16_from_utf8_cstr(buf16, list[i]->disp_term, &status); @@ -201,10 +201,10 @@ static int test_icu_sortmap(const char * locale, int src_list_len, /* computing sortkeys */ icu_sortkey8_from_utf16(coll, buf8, buf16, &status); icu_check_status(status); - + /* assigning sortkeys */ - memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len); - } + memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len); + } /* do the sorting */ qsort(list, src_list_len, sizeof(struct icu_termmap *), icu_termmap_cmp); @@ -219,30 +219,30 @@ static int test_icu_sortmap(const char * locale, int src_list_len, if (!success) { - yaz_log(YLOG_LOG, "ERROR"); - yaz_log(YLOG_LOG, "Input str:'%s':", locale); + yaz_log(YLOG_LOG, "ERROR"); + yaz_log(YLOG_LOG, "Input str:'%s':", locale); for (i = 0; i < src_list_len; i++) { - yaz_log(YLOG_LOG, " '%s'", list[i]->disp_term); + yaz_log(YLOG_LOG, " '%s'", list[i]->disp_term); } - yaz_log(YLOG_LOG, "ICU sort: '%s':", locale); + yaz_log(YLOG_LOG, "ICU sort: '%s':", locale); for (i = 0; i < src_list_len; i++) { - yaz_log(YLOG_LOG, " '%s'", list[i]->disp_term); + yaz_log(YLOG_LOG, " '%s'", list[i]->disp_term); } - yaz_log(YLOG_LOG, "Expected: '%s':", locale); + yaz_log(YLOG_LOG, "Expected: '%s':", locale); for (i = 0; i < src_list_len; i++) { - yaz_log(YLOG_LOG, " '%s'", chk_list[i]); + yaz_log(YLOG_LOG, " '%s'", chk_list[i]); } } - + for (i = 0; i < src_list_len; i++) free(list[i]); - + ucol_close(coll); icu_buf_utf8_destroy(buf8); icu_buf_utf16_destroy(buf16); - return success; + return success; } static void check_icu_sortmap(void) @@ -256,7 +256,7 @@ static void check_icu_sortmap(void) YAZ_CHECK(test_icu_sortmap("en_CA", en_1_len, en_1_src, en_1_cck)); YAZ_CHECK(test_icu_sortmap("en_GB", en_1_len, en_1_src, en_1_cck)); YAZ_CHECK(test_icu_sortmap("en_US", en_1_len, en_1_src, en_1_cck)); - + /* successful tests */ { size_t da_1_len = 6; @@ -281,7 +281,7 @@ static int test_icu_normalizer(const char * rules8cstr, const char * chk8cstr) { int success = 0; - + UErrorCode status = U_ZERO_ERROR; struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0); @@ -290,7 +290,7 @@ static int test_icu_normalizer(const char * rules8cstr, struct icu_transform * transform = icu_transform_create(rules8cstr, 'f', 0, &status); icu_check_status(status); - + icu_utf16_from_utf8_cstr(src16, src8cstr, &status); icu_check_status(status); @@ -301,7 +301,7 @@ static int test_icu_normalizer(const char * rules8cstr, icu_check_status(status); - if (!strcmp((const char *) dest8->utf8, + if (!strcmp((const char *) dest8->utf8, (const char *) chk8cstr)) success = 1; else @@ -327,7 +327,7 @@ static void check_icu_normalizer(void) YAZ_CHECK(test_icu_normalizer("[:Punctuation:] Any-Remove", "Don't shoot!", "Dont shoot")); - + YAZ_CHECK(test_icu_normalizer("[:Control:] Any-Remove", "Don't\n shoot!", "Don't shoot!")); @@ -339,7 +339,7 @@ static void check_icu_normalizer(void) YAZ_CHECK(test_icu_normalizer("Lower; [:^Letter:] Remove", "Don't shoot!", "dontshoot")); - + YAZ_CHECK(test_icu_normalizer("[:^Number:] Remove", "Monday 15th of April", "15")); @@ -375,7 +375,7 @@ static int test_icu_tokenizer(const char * locale, char action, YAZ_CHECK(tokenizer); /* attach text buffer to tokenizer */ - icu_tokenizer_attach(tokenizer, src16, &status); + icu_tokenizer_attach(tokenizer, src16, &status); icu_check_status(status); /* perform work on tokens */ @@ -400,25 +400,25 @@ static int test_icu_tokenizer(const char * locale, char action, icu_buf_utf16_destroy(src16); icu_buf_utf16_destroy(tkn16); icu_buf_utf8_destroy(tkn8); - + return success; } static void check_icu_tokenizer(void) { - const char * en_str + const char * en_str = "O Romeo, Romeo! wherefore art thou Romeo?"; - + YAZ_CHECK(test_icu_tokenizer("en", 's', en_str, 2)); YAZ_CHECK(test_icu_tokenizer("en", 'l', en_str, 7)); YAZ_CHECK(test_icu_tokenizer("en", 'w', en_str, 16)); YAZ_CHECK(test_icu_tokenizer("en", 'c', en_str, 41)); { - const char * da_str + const char * da_str = "Blåbærtærte. Denne kage stammer fra Finland. " "Den er med blåbær, men alle sommerens forskellige bær kan bruges."; - + YAZ_CHECK(test_icu_tokenizer("da", 's', da_str, 3)); YAZ_CHECK(test_icu_tokenizer("dar", 'l', da_str, 17)); YAZ_CHECK(test_icu_tokenizer("da", 'w', da_str, 37)); @@ -428,12 +428,12 @@ static void check_icu_tokenizer(void) static void check_icu_chain(void) { - const char * en_str + const char * en_str = "O Romeo, Romeo! wherefore art thou\t Romeo?"; UErrorCode status = U_ZERO_ERROR; struct icu_chain * chain = 0; - + const char * xml_str = "" "" "" @@ -442,7 +442,7 @@ static void check_icu_chain(void) "" ""; - + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); xmlNode *xml_node = xmlDocGetRootElement(doc); YAZ_CHECK(xml_node); @@ -488,7 +488,7 @@ static void check_bug_1140(void) { UErrorCode status = U_ZERO_ERROR; struct icu_chain * chain = 0; - + const char * xml_str = "" /* if the first rule is normalize instead. Then it works */ @@ -501,7 +501,7 @@ static void check_bug_1140(void) "" ""; - + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); xmlNode *xml_node = xmlDocGetRootElement(doc); YAZ_CHECK(xml_node); @@ -512,7 +512,7 @@ static void check_bug_1140(void) YAZ_CHECK(chain); if (!chain) return; - + YAZ_CHECK(icu_chain_assign_cstr( chain, "O Romeo, Romeo! wherefore art thou\t Romeo?", &status)); @@ -525,7 +525,7 @@ static void check_bug_1140(void) icu_chain_token_norm(chain), icu_chain_token_display(chain)); */ } - + YAZ_CHECK_EQ(icu_chain_token_number(chain), 7); @@ -556,7 +556,7 @@ static void check_chain_empty_token(void) "" "" ""; - + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); xmlNode *xml_node = xmlDocGetRootElement(doc); YAZ_CHECK(xml_node); @@ -565,7 +565,7 @@ static void check_chain_empty_token(void) xmlFreeDoc(doc); YAZ_CHECK(chain); - + YAZ_CHECK(icu_chain_assign_cstr( chain, "a string with 15 tokenss and 8 displays", &status)); @@ -591,7 +591,7 @@ static void check_chain_empty_chain(void) const char * xml_str = "" ""; - + const char * src8 = "some 5487 weired !¤%&(/& sTuFf"; char * dest8 = 0; @@ -603,7 +603,7 @@ static void check_chain_empty_chain(void) xmlFreeDoc(doc); YAZ_CHECK(chain); - + YAZ_CHECK(icu_chain_assign_cstr( chain, src8, &status)); @@ -621,7 +621,7 @@ static void check_chain_empty_chain(void) dest8 = (char *) icu_chain_token_norm(chain); YAZ_CHECK_EQ(strcmp(src8, dest8), 0); - + icu_chain_destroy(chain); } @@ -636,7 +636,7 @@ static void check_icu_iter1(void) "" "" ""; - + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); YAZ_CHECK(doc); if (!doc) @@ -650,7 +650,7 @@ static void check_icu_iter1(void) xmlFreeDoc(doc); YAZ_CHECK(chain); - + iter = icu_iter_create(chain); icu_iter_first(iter, "a string with 15 tokens and 8 displays"); YAZ_CHECK(iter); @@ -739,7 +739,7 @@ static void *iter_thread(void *p) { struct icu_chain *chain = (struct icu_chain *) p; int i; - + for (i = 0; i < 1000; i++) { YAZ_CHECK(test_iter(chain, "Adobe Acrobat Reader, 1991-1999.", @@ -794,7 +794,7 @@ static void check_icu_iter2(void) YAZ_CHECK(chain); if (!chain) return; - + YAZ_CHECK(test_iter(chain, "Adobe Acrobat Reader, 1991-1999.", "[adobe][acrobat][reader][1991][][1999][]")); @@ -812,7 +812,7 @@ static void check_icu_iter3(void) struct icu_chain * chain = 0; xmlNode *xml_node; - const char * xml_str = + const char * xml_str = "\n" "\n" "\n" @@ -836,7 +836,7 @@ static void check_icu_iter3(void) YAZ_CHECK(chain); if (!chain) return; - + YAZ_CHECK(test_iter(chain, "Adobe Acrobat Reader, 1991-1999.", "[adobeacrobatreader19911999]")); @@ -850,7 +850,7 @@ static void check_icu_iter3(void) int main(int argc, char **argv) { - YAZ_CHECK_INIT(argc, argv); + YAZ_CHECK_INIT(argc, argv); YAZ_CHECK_LOG(); #if YAZ_HAVE_ICU @@ -862,10 +862,10 @@ int main(int argc, char **argv) check_icu_chain(); check_chain_empty_token(); check_chain_empty_chain(); - check_icu_iter1(); + check_icu_iter1(); check_icu_iter2(); check_icu_iter3(); - + check_bug_1140(); u_cleanup(); @@ -875,7 +875,7 @@ int main(int argc, char **argv) YAZ_CHECK(0 == 0); #endif /* YAZ_HAVE_ICU */ - + YAZ_CHECK_TERM; }