X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Ficu_I18N.c;h=9f1d13c3de1a4f492278b8dd022f56a9f9eac3b9;hp=0bbd01022ef500d91c7603d92f47ae78dd099a22;hb=8a980f7051e22ca1e4fac0395ed7f8647cd9eda3;hpb=ee6ab2ee3a9ee1a8c65d7272ec7fba1d886f5af0 diff --git a/src/icu_I18N.c b/src/icu_I18N.c index 0bbd010..9f1d13c 100644 --- a/src/icu_I18N.c +++ b/src/icu_I18N.c @@ -1,8 +1,13 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2008 Index Data + * Copyright (C) 1995-2009 Index Data * See the file LICENSE for details. */ +/** + * \file icu_I18N.c + * \brief ICU utilities + */ + #if HAVE_CONFIG_H #include "config.h" #endif @@ -647,70 +652,78 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer) -struct icu_normalizer * icu_normalizer_create(const char *rules, char action, - UErrorCode *status) +struct icu_transform * icu_transform_create(const char *id, char action, + const char *rules, + UErrorCode *status) { + struct icu_buf_utf16 *id16 = icu_buf_utf16_create(0); + struct icu_buf_utf16 *rules16 = icu_buf_utf16_create(0); - struct icu_normalizer * normalizer - = (struct icu_normalizer *) xmalloc(sizeof(struct icu_normalizer)); + struct icu_transform * transform + = (struct icu_transform *) xmalloc(sizeof(struct icu_transform)); - normalizer->action = action; - normalizer->trans = 0; - normalizer->rules16 = icu_buf_utf16_create(0); - icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status); - - switch(normalizer->action) { + transform->action = action; + transform->trans = 0; + + if (id) + icu_utf16_from_utf8_cstr(id16, id, status); + if (rules) + icu_utf16_from_utf8_cstr(rules16, rules, status); + + switch(transform->action) + { case 'f': case 'F': - normalizer->trans - = utrans_openU(normalizer->rules16->utf16, - normalizer->rules16->utf16_len, + transform->trans + = utrans_openU(id16->utf16, + id16->utf16_len, UTRANS_FORWARD, - 0, 0, - normalizer->parse_error, status); + rules16->utf16, + rules16->utf16_len, + &transform->parse_error, status); break; case 'r': case 'R': - normalizer->trans - = utrans_openU(normalizer->rules16->utf16, - normalizer->rules16->utf16_len, + transform->trans + = utrans_openU(id16->utf16, + id16->utf16_len, UTRANS_REVERSE , - 0, 0, - normalizer->parse_error, status); + rules16->utf16, + rules16->utf16_len, + &transform->parse_error, status); break; default: *status = U_UNSUPPORTED_ERROR; - return 0; break; } + icu_buf_utf16_destroy(rules16); + icu_buf_utf16_destroy(id16); if (U_SUCCESS(*status)) - return normalizer; + return transform; /* freeing if failed */ - icu_normalizer_destroy(normalizer); + icu_transform_destroy(transform); return 0; } -void icu_normalizer_destroy(struct icu_normalizer * normalizer){ - if (normalizer) { - if (normalizer->rules16) - icu_buf_utf16_destroy(normalizer->rules16); - if (normalizer->trans) - utrans_close(normalizer->trans); - xfree(normalizer); +void icu_transform_destroy(struct icu_transform * transform){ + if (transform) { + if (transform->trans) + utrans_close(transform->trans); + xfree(transform); } } -int icu_normalizer_normalize(struct icu_normalizer * normalizer, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, - UErrorCode *status) +int icu_transform_trans(struct icu_transform * transform, + struct icu_buf_utf16 * dest16, + struct icu_buf_utf16 * src16, + UErrorCode *status) { - if (!normalizer || !normalizer->trans + if (!transform || !transform->trans || !src16 || !dest16) return 0; @@ -724,7 +737,7 @@ int icu_normalizer_normalize(struct icu_normalizer * normalizer, return 0; - utrans_transUChars (normalizer->trans, + utrans_transUChars (transform->trans, dest16->utf16, &(dest16->utf16_len), dest16->utf16_cap, 0, &(src16->utf16_len), status); @@ -762,13 +775,20 @@ struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain, case ICU_chain_step_type_casemap: step->u.casemap = icu_casemap_create(rule[0], status); break; - case ICU_chain_step_type_normalize: - step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status); + case ICU_chain_step_type_transform: + /* rule omitted. Only ID used */ + step->u.transform = icu_transform_create((const char *) rule, 'f', + 0, status); break; case ICU_chain_step_type_tokenize: step->u.tokenizer = icu_tokenizer_create((char *) chain->locale, (char) rule[0], status); break; + case ICU_chain_step_type_transliterate: + /* we pass a dummy ID to utrans_openU.. */ + step->u.transform = icu_transform_create("custom", 'f', + (const char *) rule, status); + break; default: break; } @@ -791,8 +811,9 @@ void icu_chain_step_destroy(struct icu_chain_step * step){ icu_casemap_destroy(step->u.casemap); icu_buf_utf16_destroy(step->buf16); break; - case ICU_chain_step_type_normalize: - icu_normalizer_destroy(step->u.normalizer); + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: + icu_transform_destroy(step->u.transform); icu_buf_utf16_destroy(step->buf16); break; case ICU_chain_step_type_tokenize: @@ -901,7 +922,10 @@ struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node, step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap, (const uint8_t *) xml_rule, status); else if (!strcmp((const char *) node->name, "transform")) - step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, + step = icu_chain_insert_step(chain, ICU_chain_step_type_transform, + (const uint8_t *) xml_rule, status); + else if (!strcmp((const char *) node->name, "transliterate")) + step = icu_chain_insert_step(chain, ICU_chain_step_type_transliterate, (const uint8_t *) xml_rule, status); else if (!strcmp((const char *) node->name, "tokenize")) step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize, @@ -909,14 +933,31 @@ struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node, else if (!strcmp((const char *) node->name, "display")) step = icu_chain_insert_step(chain, ICU_chain_step_type_display, (const uint8_t *) "", status); + else if (!strcmp((const char *) node->name, "normalize")) + { + yaz_log(YLOG_WARN, "Element %s is deprecated. " + "Use transform instead", node->name); + step = icu_chain_insert_step(chain, ICU_chain_step_type_transform, + (const uint8_t *) xml_rule, status); + } + else if (!strcmp((const char *) node->name, "index") + || !strcmp((const char *) node->name, "sortkey")) + { + yaz_log(YLOG_WARN, "Element %s is no longer needed. " + "Remove it from the configuration", node->name); + } + else + { + yaz_log(YLOG_WARN, "Unknown element %s", node->name); + icu_chain_destroy(chain); + return 0; + } xmlFree(xml_rule); - if (!step || U_FAILURE(*status)) + if (step && U_FAILURE(*status)) { icu_chain_destroy(chain); return 0; } - - } return chain; } @@ -953,12 +994,14 @@ struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, case ICU_chain_step_type_casemap: buf16 = icu_buf_utf16_create(0); break; - case ICU_chain_step_type_normalize: + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: buf16 = icu_buf_utf16_create(0); break; case ICU_chain_step_type_tokenize: buf16 = icu_buf_utf16_create(0); break; + break; default: break; } @@ -1028,9 +1071,10 @@ int icu_chain_step_next_token(struct icu_chain * chain, step->buf16, src16, status, chain->locale); break; - case ICU_chain_step_type_normalize: - icu_normalizer_normalize(step->u.normalizer, - step->buf16, src16, status); + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: + icu_transform_trans(step->u.transform, + step->buf16, src16, status); break; case ICU_chain_step_type_tokenize: /* attach to new src16 token only first time during splitting */ @@ -1202,7 +1246,9 @@ const UCollator * icu_chain_get_coll(struct icu_chain * chain) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +