/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2009 Index Data
+ * Copyright (C) 1995-2010 Index Data
* See the file LICENSE for details.
*/
#endif
#include <yaz/test.h>
+#include <yaz/log.h>
#if YAZ_HAVE_ICU
#include <yaz/icu_I18N.h>
success = 0;
/* report failures */
- if (!success){
- printf("\nERROR\n");
- printf("original string: '%s' (%d)\n", src8cstr, src8cstr_len);
- printf("icu_casemap '%s:%c' '%s' (%d)\n",
- locale, action, dest8->utf8, dest8->utf8_len);
- printf("expected string: '%s' (%d)\n", chk8cstr, chk8cstr_len);
+ if (!success)
+ {
+ yaz_log(YLOG_WARN, "test_icu_casemap failed");
+ yaz_log(YLOG_LOG, "Original string: '%s' (%d)",
+ src8cstr, src8cstr_len);
+ yaz_log(YLOG_LOG, "icu_casemap '%s:%c' '%s' (%d)",
+ locale, action, dest8->utf8, dest8->utf8_len);
+ yaz_log(YLOG_LOG, "expected string: '%s' (%d)",
+ chk8cstr, chk8cstr_len);
}
/* clean the buffers */
if (!success)
{
- printf("\nERROR\n");
- printf("Input str: '%s' : ", locale);
+ yaz_log(YLOG_LOG, "ERROR");
+ yaz_log(YLOG_LOG, "Input str:'%s':", locale);
for (i = 0; i < src_list_len; i++) {
- printf(" '%s'", list[i]->disp_term);
+ yaz_log(YLOG_LOG, " '%s'", list[i]->disp_term);
}
- printf("\n");
- printf("ICU sort: '%s' : ", locale);
+ yaz_log(YLOG_LOG, "ICU sort: '%s':", locale);
for (i = 0; i < src_list_len; i++) {
- printf(" '%s'", list[i]->disp_term);
+ yaz_log(YLOG_LOG, " '%s'", list[i]->disp_term);
}
- printf("\n");
- printf("Expected: '%s' : ", locale);
+ yaz_log(YLOG_LOG, "Expected: '%s':", locale);
for (i = 0; i < src_list_len; i++) {
- printf(" '%s'", chk_list[i]);
+ yaz_log(YLOG_LOG, " '%s'", chk_list[i]);
}
- printf("\n");
}
for (i = 0; i < src_list_len; i++)
else
{
success = 0;
- printf("Normalization\n");
- printf("Rules: '%s'\n", rules8cstr);
- printf("Input: '%s'\n", src8cstr);
- printf("Normalized: '%s'\n", dest8->utf8);
- printf("Expected: '%s'\n", chk8cstr);
+ yaz_log(YLOG_LOG, "Normalization");
+ yaz_log(YLOG_LOG, " Rules: '%s'", rules8cstr);
+ yaz_log(YLOG_LOG, " Input: '%s'", src8cstr);
+ yaz_log(YLOG_LOG, " Normalized: '%s'", dest8->utf8);
+ yaz_log(YLOG_LOG, " Expected: '%s'", chk8cstr);
}
icu_transform_destroy(transform);
if (count != icu_tokenizer_token_count(tokenizer)){
success = 0;
- printf("\nTokenizer '%s:%c' Error: \n", locale, action);
- printf("Input: '%s'\n", src8cstr);
- printf("Tokens: %d", icu_tokenizer_token_count(tokenizer));
- printf(", expected: %d\n", count);
+ yaz_log(YLOG_LOG, "Tokenizer '%s:%c' Error:", locale, action);
+ yaz_log(YLOG_LOG, " Input: '%s'", src8cstr);
+ yaz_log(YLOG_LOG, " Tokens: %d", icu_tokenizer_token_count(tokenizer));
+ yaz_log(YLOG_LOG, " Expected: %d", count);
}
icu_tokenizer_destroy(tokenizer);
icu_chain_destroy(chain);
}
+void test_icu_iter1(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ struct icu_chain * chain = 0;
+ xmlNode *xml_node;
+ struct icu_iter *iter;
+ struct icu_buf_utf8 *token;
+
+ const char * xml_str = "<icu locale=\"en\">"
+ "<tokenize rule=\"w\"/>"
+ "<transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
+ "</icu>";
+
+ xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+ YAZ_CHECK(doc);
+ if (!doc)
+ return;
+ xml_node = xmlDocGetRootElement(doc);
+ YAZ_CHECK(xml_node);
+ if (!xml_node)
+ return ;
+
+ chain = icu_chain_xml_config(xml_node, 0, &status);
+
+ xmlFreeDoc(doc);
+ YAZ_CHECK(chain);
+
+ iter = icu_iter_create(chain, "a string with 15 tokens and 8 displays");
+ YAZ_CHECK(iter);
+ if (!iter)
+ return;
+ token = icu_buf_utf8_create(0);
+ while (icu_iter_next(iter, token))
+ {
+ yaz_log(YLOG_LOG, "[%.*s]", (int) token->utf8_len, token->utf8);
+ }
+ icu_buf_utf8_destroy(token);
+
+ icu_iter_destroy(iter);
+ icu_chain_destroy(chain);
+}
+
+
+void test_icu_iter2(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ struct icu_chain * chain = 0;
+ xmlNode *xml_node;
+ struct icu_iter *iter;
+ struct icu_buf_utf8 *token;
+
+ const char * xml_str = "<icu locale=\"en\">"
+ "<transform rule=\"[:Control:] Any-Remove\"/>"
+ "<tokenize rule=\"l\"/>"
+ "<tokenize rule=\"w\"/>"
+ "<transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
+ "<display/>"
+ "<casemap rule=\"l\"/>"
+ "</icu>";
+
+ xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+ YAZ_CHECK(doc);
+ if (!doc)
+ return;
+ xml_node = xmlDocGetRootElement(doc);
+ YAZ_CHECK(xml_node);
+ if (!xml_node)
+ return ;
+
+ chain = icu_chain_xml_config(xml_node, 0, &status);
+
+ xmlFreeDoc(doc);
+ YAZ_CHECK(chain);
+
+ iter = icu_iter_create(chain, "Adobe Acrobat Reader, 1991-1999.");
+ YAZ_CHECK(iter);
+ if (!iter)
+ return;
+ token = icu_buf_utf8_create(0);
+ while (icu_iter_next(iter, token))
+ {
+ yaz_log(YLOG_LOG, "[%.*s]", (int) token->utf8_len, token->utf8);
+ }
+ icu_buf_utf8_destroy(token);
+
+ icu_iter_destroy(iter);
+ icu_chain_destroy(chain);
+}
+
#endif /* YAZ_HAVE_ICU */
int main(int argc, char **argv)
test_icu_I18N_chain(argc, argv);
test_chain_empty_token();
test_chain_empty_chain();
+ test_icu_iter1();
+ test_icu_iter2();
+
test_bug_1140();
#else /* YAZ_HAVE_ICU */
- printf("ICU unit tests omitted.\n"
- "Please install libicu36-dev and icu-doc or similar\n");
+ yaz_log(YLOG_LOG, "ICU unit tests omitted");
YAZ_CHECK(0 == 0);
#endif /* YAZ_HAVE_ICU */