X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Ficu_I18N.c;h=9f1d13c3de1a4f492278b8dd022f56a9f9eac3b9;hp=b2119b6804cf7ebeb5e999a2958d4e947fe2fb48;hb=4e3f7be8266559e07dc01ae5d6aba31d30b8bd44;hpb=68398183c3b25120b484681482f461a1da4c6a36 diff --git a/src/icu_I18N.c b/src/icu_I18N.c index b2119b6..9f1d13c 100644 --- a/src/icu_I18N.c +++ b/src/icu_I18N.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2008 Index Data + * Copyright (C) 1995-2009 Index Data * See the file LICENSE for details. */ @@ -652,70 +652,78 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer) -struct icu_normalizer * icu_normalizer_create(const char *rules, char action, - UErrorCode *status) +struct icu_transform * icu_transform_create(const char *id, char action, + const char *rules, + UErrorCode *status) { + struct icu_buf_utf16 *id16 = icu_buf_utf16_create(0); + struct icu_buf_utf16 *rules16 = icu_buf_utf16_create(0); + + struct icu_transform * transform + = (struct icu_transform *) xmalloc(sizeof(struct icu_transform)); + + transform->action = action; + transform->trans = 0; - struct icu_normalizer * normalizer - = (struct icu_normalizer *) xmalloc(sizeof(struct icu_normalizer)); + if (id) + icu_utf16_from_utf8_cstr(id16, id, status); + if (rules) + icu_utf16_from_utf8_cstr(rules16, rules, status); - normalizer->action = action; - normalizer->trans = 0; - normalizer->rules16 = icu_buf_utf16_create(0); - icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status); - - switch(normalizer->action) { + switch(transform->action) + { case 'f': case 'F': - normalizer->trans - = utrans_openU(normalizer->rules16->utf16, - normalizer->rules16->utf16_len, + transform->trans + = utrans_openU(id16->utf16, + id16->utf16_len, UTRANS_FORWARD, - 0, 0, - &normalizer->parse_error, status); + rules16->utf16, + rules16->utf16_len, + &transform->parse_error, status); break; case 'r': case 'R': - normalizer->trans - = utrans_openU(normalizer->rules16->utf16, - normalizer->rules16->utf16_len, + transform->trans + = utrans_openU(id16->utf16, + id16->utf16_len, UTRANS_REVERSE , - 0, 0, - &normalizer->parse_error, status); + rules16->utf16, + rules16->utf16_len, + &transform->parse_error, status); break; default: *status = U_UNSUPPORTED_ERROR; - return 0; break; } + icu_buf_utf16_destroy(rules16); + icu_buf_utf16_destroy(id16); if (U_SUCCESS(*status)) - return normalizer; + return transform; /* freeing if failed */ - icu_normalizer_destroy(normalizer); + icu_transform_destroy(transform); return 0; } -void icu_normalizer_destroy(struct icu_normalizer * normalizer){ - if (normalizer) { - if (normalizer->rules16) - icu_buf_utf16_destroy(normalizer->rules16); - if (normalizer->trans) - utrans_close(normalizer->trans); - xfree(normalizer); +void icu_transform_destroy(struct icu_transform * transform){ + if (transform) { + if (transform->trans) + utrans_close(transform->trans); + xfree(transform); } } -int icu_normalizer_normalize(struct icu_normalizer * normalizer, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, - UErrorCode *status) +int icu_transform_trans(struct icu_transform * transform, + struct icu_buf_utf16 * dest16, + struct icu_buf_utf16 * src16, + UErrorCode *status) { - if (!normalizer || !normalizer->trans + if (!transform || !transform->trans || !src16 || !dest16) return 0; @@ -729,7 +737,7 @@ int icu_normalizer_normalize(struct icu_normalizer * normalizer, return 0; - utrans_transUChars (normalizer->trans, + utrans_transUChars (transform->trans, dest16->utf16, &(dest16->utf16_len), dest16->utf16_cap, 0, &(src16->utf16_len), status); @@ -767,13 +775,20 @@ struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain, case ICU_chain_step_type_casemap: step->u.casemap = icu_casemap_create(rule[0], status); break; - case ICU_chain_step_type_normalize: - step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status); + case ICU_chain_step_type_transform: + /* rule omitted. Only ID used */ + step->u.transform = icu_transform_create((const char *) rule, 'f', + 0, status); break; case ICU_chain_step_type_tokenize: step->u.tokenizer = icu_tokenizer_create((char *) chain->locale, (char) rule[0], status); break; + case ICU_chain_step_type_transliterate: + /* we pass a dummy ID to utrans_openU.. */ + step->u.transform = icu_transform_create("custom", 'f', + (const char *) rule, status); + break; default: break; } @@ -796,8 +811,9 @@ void icu_chain_step_destroy(struct icu_chain_step * step){ icu_casemap_destroy(step->u.casemap); icu_buf_utf16_destroy(step->buf16); break; - case ICU_chain_step_type_normalize: - icu_normalizer_destroy(step->u.normalizer); + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: + icu_transform_destroy(step->u.transform); icu_buf_utf16_destroy(step->buf16); break; case ICU_chain_step_type_tokenize: @@ -906,7 +922,10 @@ struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node, step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap, (const uint8_t *) xml_rule, status); else if (!strcmp((const char *) node->name, "transform")) - step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, + step = icu_chain_insert_step(chain, ICU_chain_step_type_transform, + (const uint8_t *) xml_rule, status); + else if (!strcmp((const char *) node->name, "transliterate")) + step = icu_chain_insert_step(chain, ICU_chain_step_type_transliterate, (const uint8_t *) xml_rule, status); else if (!strcmp((const char *) node->name, "tokenize")) step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize, @@ -918,7 +937,7 @@ struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node, { yaz_log(YLOG_WARN, "Element %s is deprecated. " "Use transform instead", node->name); - step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, + step = icu_chain_insert_step(chain, ICU_chain_step_type_transform, (const uint8_t *) xml_rule, status); } else if (!strcmp((const char *) node->name, "index") @@ -975,12 +994,14 @@ struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, case ICU_chain_step_type_casemap: buf16 = icu_buf_utf16_create(0); break; - case ICU_chain_step_type_normalize: + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: buf16 = icu_buf_utf16_create(0); break; case ICU_chain_step_type_tokenize: buf16 = icu_buf_utf16_create(0); break; + break; default: break; } @@ -1050,9 +1071,10 @@ int icu_chain_step_next_token(struct icu_chain * chain, step->buf16, src16, status, chain->locale); break; - case ICU_chain_step_type_normalize: - icu_normalizer_normalize(step->u.normalizer, - step->buf16, src16, status); + case ICU_chain_step_type_transform: + case ICU_chain_step_type_transliterate: + icu_transform_trans(step->u.transform, + step->buf16, src16, status); break; case ICU_chain_step_type_tokenize: /* attach to new src16 token only first time during splitting */ @@ -1224,7 +1246,9 @@ const UCollator * icu_chain_get_coll(struct icu_chain * chain) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +