From b55d58436ec1d0b56ea2833ab8a711f8b9232ed7 Mon Sep 17 00:00:00 2001 From: Marc Cromme Date: Mon, 21 May 2007 10:14:08 +0000 Subject: [PATCH] ICU chain XML configuration up and running, used in unit test as well. --- src/icu_I18N.c | 96 +++++++++++++++++++++++++++++++++++++++++++++---- src/icu_I18N.h | 9 ++++- src/test_icu_I18N.c | 36 +++++++++++++++---- src/test_icu_I18N.xml | 1 + 4 files changed, 128 insertions(+), 14 deletions(-) diff --git a/src/icu_I18N.c b/src/icu_I18N.c index 29b9edd..aaa0924 100644 --- a/src/icu_I18N.c +++ b/src/icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: icu_I18N.c,v 1.17 2007-05-20 19:00:17 marc Exp $ +/* $Id: icu_I18N.c,v 1.18 2007-05-21 10:14:08 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -862,16 +862,96 @@ struct icu_chain * icu_chain_create(const uint8_t * identifier, void icu_chain_destroy(struct icu_chain * chain) { - icu_buf_utf8_destroy(chain->display8); - icu_buf_utf8_destroy(chain->norm8); - icu_buf_utf8_destroy(chain->sort8); + if (chain){ + icu_buf_utf8_destroy(chain->display8); + icu_buf_utf8_destroy(chain->norm8); + icu_buf_utf8_destroy(chain->sort8); + + icu_buf_utf16_destroy(chain->src16); + + icu_chain_step_destroy(chain->steps); + } +}; + + + +struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, + UErrorCode * status){ + + xmlNode *node = 0; + struct icu_chain * chain = 0; + + if (!xml_node + ||xml_node->type != XML_ELEMENT_NODE + || strcmp((const char *) xml_node->name, "icu_chain")) + + return 0; + + xmlChar *xml_id = xmlGetProp(xml_node, (xmlChar *) "id"); + xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale"); + + if (!xml_id || !strlen((const char *) xml_id) + || !xml_locale || !strlen((const char *) xml_locale)) + return 0; + + chain = icu_chain_create((const uint8_t *) xml_id, + (const uint8_t *) xml_locale); + + if (!chain) + return 0; + + for (node = xml_node->children; node; node = node->next) + { + if (node->type != XML_ELEMENT_NODE) + continue; + + xmlChar *xml_rule = xmlGetProp(node, (xmlChar *) "rule"); + struct icu_chain_step * step = 0; + + if (!strcmp((const char *) node->name, + (const char *) "casemap")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap, + (const uint8_t *) xml_rule, status); + } + else if (!strcmp((const char *) node->name, + (const char *) "normalize")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, + (const uint8_t *) xml_rule, status); + } + else if (!strcmp((const char *) node->name, + (const char *) "tokenize")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize, + (const uint8_t *) xml_rule, status); + } + else if (!strcmp((const char *) node->name, + (const char *) "display")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_display, + (const uint8_t *) "", status); + } + else if (!strcmp((const char *) node->name, + (const char *) "normal")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_norm, + (const uint8_t *) "", status); + } + else if (!strcmp((const char *) node->name, + (const char *) "sort")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_sort, + (const uint8_t *) "", status); + } - icu_buf_utf16_destroy(chain->src16); + if (!step || U_FAILURE(*status)){ + icu_chain_destroy(chain); + return 0; + } + + + } - icu_chain_step_destroy(chain->steps); + return chain; }; + struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, enum icu_chain_step_type type, const uint8_t * rule, @@ -1031,10 +1111,12 @@ int icu_chain_assign_cstr(struct icu_chain * chain, const char * src8cstr, UErrorCode *status) { - struct icu_chain_step * stp = chain->steps; + struct icu_chain_step * stp = 0; if (!chain || !src8cstr) return 0; + + stp = chain->steps; // clear token count chain->token_count = 0; diff --git a/src/icu_I18N.h b/src/icu_I18N.h index 299eb7b..8ad7b11 100644 --- a/src/icu_I18N.h +++ b/src/icu_I18N.h @@ -1,4 +1,4 @@ -/* $Id: icu_I18N.h,v 1.15 2007-05-20 19:00:17 marc Exp $ +/* $Id: icu_I18N.h,v 1.16 2007-05-21 10:14:08 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -26,6 +26,8 @@ #include +#include +#include #include /* Basic ICU data types */ #include /* char names */ @@ -245,8 +247,13 @@ struct icu_chain struct icu_chain * icu_chain_create(const uint8_t * identifier, const uint8_t * locale); + void icu_chain_destroy(struct icu_chain * chain); +struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, + UErrorCode * status); + + struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, enum icu_chain_step_type type, const uint8_t * rule, diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index 8861765..4e976eb 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: test_icu_I18N.c,v 1.22 2007-05-20 19:00:17 marc Exp $ +/* $Id: test_icu_I18N.c,v 1.23 2007-05-21 10:14:08 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -498,14 +498,35 @@ void test_icu_I18N_tokenizer(int argc, char **argv) void test_icu_I18N_chain(int argc, char **argv) { const char * en_str - = "O Romeo, Romeo! wherefore art\nthou\tRomeo?"; + = "O Romeo, Romeo! wherefore art thou\t Romeo?"; printf("ICU chain:\ninput: '%s'\n", en_str); UErrorCode status = U_ZERO_ERROR; - struct icu_chain_step * step = 0; - struct icu_chain * chain - = icu_chain_create((uint8_t *) "en:word", (uint8_t *) "en"); + //struct icu_chain_step * step = 0; + struct icu_chain * chain = 0; + + + const char * xml_str = "" + "" + "" + "" + "" + "" + "" + "" + ""; + + + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); + xmlNode *xml_node = xmlDocGetRootElement(doc); + YAZ_CHECK(xml_node); + + + chain = icu_chain_xml_config(xml_node, &status); + +#if 0 + chain = icu_chain_create((uint8_t *) "en:word", (uint8_t *) "en"); step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, (const uint8_t *) "[:Control:] Any-Remove", &status); @@ -535,8 +556,9 @@ void test_icu_I18N_chain(int argc, char **argv) /* (const uint8_t *)"", */ /* &status); */ +#endif - + YAZ_CHECK(chain); YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status)); @@ -547,6 +569,8 @@ void test_icu_I18N_chain(int argc, char **argv) icu_chain_get_display(chain)); } + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7); + icu_chain_destroy(chain); } diff --git a/src/test_icu_I18N.xml b/src/test_icu_I18N.xml index 17b593a..ea7e7d2 100644 --- a/src/test_icu_I18N.xml +++ b/src/test_icu_I18N.xml @@ -17,6 +17,7 @@ + -- 1.7.10.4