-/* $Id: tst_icu_I18N.c,v 1.1 2007-10-22 12:21:39 adam Exp $
+/* $Id: tst_icu_I18N.c,v 1.2 2007-10-22 17:32:07 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
02111-1307, USA.
*/
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
#if HAVE_CONFIG_H
-#include "cconfig.h"
+#include "config.h"
#endif
#define USE_TIMING 0
#include <yaz/test.h>
-
-
-#ifdef HAVE_ICU
+#if HAVE_ICU
#include <yaz/icu_I18N.h>
#include <string.h>
#include <stdlib.h>
-//#include <unicode/ustring.h>
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
#define MAX_KEY_SIZE 256
struct icu_termmap
{
- uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated
- char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string
+ uint8_t sort_key[MAX_KEY_SIZE]; /* standard C string '\0' terminated */
+ char disp_term[MAX_KEY_SIZE]; /* standard C utf-8 string */
};
int src8cstr_len = strlen(src8cstr);
int chk8cstr_len = strlen(chk8cstr);
- // converting to UTF16
+ /* converting to UTF16 */
icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
- // perform case mapping
+ /* perform case mapping */
icu_utf16_casemap(dest16, src16, locale, action, &status);
- // converting to UTF8
+ /* converting to UTF8 */
icu_utf16_to_utf8(dest8, dest16, &status);
- // determine success
+ /* determine success */
if (dest8->utf8
&& (dest8->utf8_len == strlen(chk8cstr))
&& !strcmp(chk8cstr, (const char *) dest8->utf8))
else
success = 0;
- // report failures
+ /* report failures */
if (!success){
printf("\nERROR\n");
printf("original string: '%s' (%d)\n", src8cstr, src8cstr_len);
printf("expected string: '%s' (%d)\n", chk8cstr, chk8cstr_len);
}
- // clean the buffers
+ /* clean the buffers */
icu_buf_utf8_destroy(src8);
icu_buf_utf8_destroy(dest8);
icu_buf_utf16_destroy(src16);
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
void test_icu_I18N_casemap(int argc, char **argv)
{
- // Locale 'en'
+ /* Locale 'en' */
- // sucessful tests
+ /* successful tests */
YAZ_CHECK(test_icu_casemap("en", 'l',
"A ReD fOx hunTS sQUirriLs",
"a red fox hunts squirrils"));
"A Red Fox Hunts Squirrils"));
- // Locale 'da'
+ /* Locale 'da' */
- // sucess expected
+ /* success expected */
YAZ_CHECK(test_icu_casemap("da", 'l',
"åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN",
"åh æble, øs fløde i åen efter blåbærgrøden"));
"åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN",
"Åh Æble, Øs Fløde I Åen Efter Blåbærgrøden"));
- // Locale 'de'
+ /* Locale 'de' */
- // sucess expected
+ /* success expected */
YAZ_CHECK(test_icu_casemap("de", 'l',
"zWÖlf ärgerliche Würste rollen ÜBer die StRAße",
"zwölf ärgerliche würste rollen über die straße"));
}
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
int test_icu_sortmap(const char * locale, int src_list_len,
const char ** src_list, const char ** chk_list)
if(U_FAILURE(status))
return 0;
- // assigning display terms and sort keys using buf 8 and buf16
+ /* assigning display terms and sort keys using buf 8 and buf16 */
for( i = 0; i < src_list_len; i++)
{
list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap));
- // copy display term
+ /* copy display term */
strcpy(list[i]->disp_term, src_list[i]);
- // transforming to UTF16
+ /* transforming to UTF16 */
icu_utf16_from_utf8_cstr(buf16, list[i]->disp_term, &status);
icu_check_status(status);
- // computing sortkeys
+ /* computing sortkeys */
icu_sortkey8_from_utf16(coll, buf8, buf16, &status);
icu_check_status(status);
- // assigning sortkeys
+ /* assigning sortkeys */
memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len);
- //strncpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len);
- //strcpy((char *) list[i]->sort_key, (const char *) buf8->utf8);
}
- // do the sorting
+ /* do the sorting */
qsort(list, src_list_len,
sizeof(struct icu_termmap *), icu_termmap_cmp);
- // checking correct sorting
+ /* checking correct sorting */
for (i = 0; i < src_list_len; i++){
if (0 != strcmp(list[i]->disp_term, chk_list[i])){
success = 0;
printf("ICU sort: '%s' : ", locale);
for (i = 0; i < src_list_len; i++) {
printf(" '%s'", list[i]->disp_term);
- //printf("(%d|%d)", list[i]->sort_key[0],list[i]->sort_key[1]);
}
printf("\n");
printf("Expected: '%s' : ", locale);
}
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
void test_icu_I18N_sortmap(int argc, char **argv)
{
- // sucessful tests
+ /* successful tests */
size_t en_1_len = 6;
const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"};
const char * en_1_cck[6] = {"a", "A", "k", "K", "z", "Z"};
YAZ_CHECK(test_icu_sortmap("en_GB", en_1_len, en_1_src, en_1_cck));
YAZ_CHECK(test_icu_sortmap("en_US", en_1_len, en_1_src, en_1_cck));
- // sucessful tests
- size_t da_1_len = 6;
- const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
- const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
- YAZ_CHECK(test_icu_sortmap("da", da_1_len, da_1_src, da_1_cck));
- YAZ_CHECK(test_icu_sortmap("da_DK", da_1_len, da_1_src, da_1_cck));
-
- // sucessful tests
- size_t de_1_len = 9;
- const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
- const char * de_1_cck[9] = {"a","ä", "o", "ö", "s", "ß", "t", "u", "ü"};
- YAZ_CHECK(test_icu_sortmap("de", de_1_len, de_1_src, de_1_cck));
- YAZ_CHECK(test_icu_sortmap("de_AT", de_1_len, de_1_src, de_1_cck));
- YAZ_CHECK(test_icu_sortmap("de_DE", de_1_len, de_1_src, de_1_cck));
+ /* successful tests */
+ {
+ size_t da_1_len = 6;
+ const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
+ const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
+ YAZ_CHECK(test_icu_sortmap("da", da_1_len, da_1_src, da_1_cck));
+ YAZ_CHECK(test_icu_sortmap("da_DK", da_1_len, da_1_src, da_1_cck));
+ }
+ /* successful tests */
+ {
+ size_t de_1_len = 9;
+ const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
+ const char * de_1_cck[9] = {"a","ä", "o", "ö", "s", "ß", "t", "u", "ü"};
+ YAZ_CHECK(test_icu_sortmap("de", de_1_len, de_1_src, de_1_cck));
+ YAZ_CHECK(test_icu_sortmap("de_AT", de_1_len, de_1_src, de_1_cck));
+ YAZ_CHECK(test_icu_sortmap("de_DE", de_1_len, de_1_src, de_1_cck));
+ }
}
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
};
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
void test_icu_I18N_normalizer(int argc, char **argv)
{
}
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
int test_icu_tokenizer(const char * locale, char action,
const char * src8cstr, int count)
struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
struct icu_buf_utf16 * tkn16 = icu_buf_utf16_create(0);
struct icu_buf_utf8 * tkn8 = icu_buf_utf8_create(0);
+ struct icu_tokenizer * tokenizer = 0;
- //printf("Input: '%s'\n", src8cstr);
-
- // transforming to UTF16
+ /* transforming to UTF16 */
icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
icu_check_status(status);
- // set up tokenizer
- struct icu_tokenizer * tokenizer
- = icu_tokenizer_create(locale, action, &status);
+ /* set up tokenizer */
+ tokenizer = icu_tokenizer_create(locale, action, &status);
icu_check_status(status);
YAZ_CHECK(tokenizer);
- // attach text buffer to tokenizer
+ /* attach text buffer to tokenizer */
icu_tokenizer_attach(tokenizer, src16, &status);
icu_check_status(status);
YAZ_CHECK(tokenizer->bi);
- // perform work on tokens
- //printf("Tokens: ");
+ /* perform work on tokens */
while(icu_tokenizer_next_token(tokenizer, tkn16, &status)){
icu_check_status(status);
- // converting to UTF8
+ /* converting to UTF8 */
icu_utf16_to_utf8(tkn8, tkn16, &status);
-
- //printf("token %d %d %d %d '%s'\n",
- //
- // icu_tokenizer_token_start(tokenizer),
- // icu_tokenizer_token_end(tokenizer),
- // icu_tokenizer_token_length(tokenizer),
- // tkn8->utf8);
}
if (count != icu_tokenizer_token_count(tokenizer)){
}
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
void test_icu_I18N_tokenizer(int argc, char **argv)
{
- const char * da_str
- = "Blåbærtærte. Denne kage stammer fra Finland. "
- "Den er med blåbær, men alle sommerens forskellige bær kan bruges.";
-
- YAZ_CHECK(test_icu_tokenizer("da", 's', da_str, 3));
- YAZ_CHECK(test_icu_tokenizer("dar", 'l', da_str, 17));
- YAZ_CHECK(test_icu_tokenizer("da", 'w', da_str, 37));
- YAZ_CHECK(test_icu_tokenizer("da", 'c', da_str, 110));
+ {
+ const char * da_str
+ = "Blåbærtærte. Denne kage stammer fra Finland. "
+ "Den er med blåbær, men alle sommerens forskellige bær kan bruges.";
+
+ YAZ_CHECK(test_icu_tokenizer("da", 's', da_str, 3));
+ YAZ_CHECK(test_icu_tokenizer("dar", 'l', da_str, 17));
+ YAZ_CHECK(test_icu_tokenizer("da", 'w', da_str, 37));
+ YAZ_CHECK(test_icu_tokenizer("da", 'c', da_str, 110));
+ }
}
const char * en_str
= "O Romeo, Romeo! wherefore art thou\t Romeo?";
- printf("ICU chain:\ninput: '%s'\n", en_str);
-
UErrorCode status = U_ZERO_ERROR;
- //struct icu_chain_step * step = 0;
struct icu_chain * chain = 0;
xmlNode *xml_node = xmlDocGetRootElement(doc);
YAZ_CHECK(xml_node);
+ printf("ICU chain:\ninput: '%s'\n", en_str);
+
chain = icu_chain_xml_config(xml_node, &status);
void test_bug_1140(void)
{
- const char * en_str
- = "O Romeo, Romeo! wherefore art thou\t Romeo?";
-
- printf("ICU chain:\ninput: '%s'\n", en_str);
-
UErrorCode status = U_ZERO_ERROR;
- //struct icu_chain_step * step = 0;
struct icu_chain * chain = 0;
const char * xml_str = "<icu_chain id=\"en:word\" locale=\"en\">"
#endif // HAVE_ICU
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
int main(int argc, char **argv)
{
YAZ_CHECK_INIT(argc, argv);
YAZ_CHECK_LOG();
-#ifdef HAVE_ICU
+#if HAVE_ICU
- //test_icu_I18N_casemap_failures(argc, argv);
test_icu_I18N_casemap(argc, argv);
test_icu_I18N_sortmap(argc, argv);
test_icu_I18N_normalizer(argc, argv);
test_icu_I18N_chain(argc, argv);
test_bug_1140();
-#else // HAVE_ICU
+#else /* HAVE_ICU */
printf("ICU unit tests omitted.\n"
"Please install libicu36-dev and icu-doc or similar\n");
YAZ_CHECK(0 == 0);
-#endif // HAVE_ICU
+#endif /* HAVE_ICU */
YAZ_CHECK_TERM;
}
-// DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8
+/* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */