X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ficu_I18N.c;h=a978b2159677ba45686d3acd0b0c2de28ec1808a;hb=def0d51c189bc8a2e9f5f9a67a58833897edecb7;hp=879fcd8c6d4e863827f58739a1bafcd044248180;hpb=b59f3a79326742a4432bda999670f0c85e2c83b7;p=yaz-moved-to-github.git diff --git a/src/icu_I18N.c b/src/icu_I18N.c index 879fcd8..a978b21 100644 --- a/src/icu_I18N.c +++ b/src/icu_I18N.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: icu_I18N.c,v 1.7 2007-10-25 08:40:06 marc Exp $ + * $Id: icu_I18N.c,v 1.12 2007-11-07 10:19:12 adam Exp $ */ #if HAVE_CONFIG_H @@ -267,6 +267,7 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, size_t src8cstr_len = 0; int32_t utf16_len = 0; + *status = U_ZERO_ERROR; src8cstr_len = strlen(src8cstr); u_strFromUTF8(dest16->utf16, dest16->utf16_cap, @@ -850,7 +851,7 @@ void icu_chain_step_destroy(struct icu_chain_step * step){ -struct icu_chain * icu_chain_create(const uint8_t * locale, +struct icu_chain * icu_chain_create(const char *locale, int sort, UErrorCode * status) { @@ -858,6 +859,8 @@ struct icu_chain * icu_chain_create(const uint8_t * locale, struct icu_chain * chain = (struct icu_chain *) malloc(sizeof(struct icu_chain)); + *status = U_ZERO_ERROR; + strncpy((char *) chain->locale, (const char *) locale, 16); chain->locale[16 - 1] = '\0'; @@ -871,6 +874,8 @@ struct icu_chain * icu_chain_create(const uint8_t * locale, chain->token_count = 0; + chain->src8cstr = 0; + chain->display8 = icu_buf_utf8_create(0); chain->norm8 = icu_buf_utf8_create(0); chain->sort8 = icu_buf_utf8_create(0); @@ -879,7 +884,6 @@ struct icu_chain * icu_chain_create(const uint8_t * locale, chain->steps = 0; - return chain; } @@ -904,14 +908,16 @@ void icu_chain_destroy(struct icu_chain * chain) -struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, - const uint8_t * locale, +struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node, + const char *locale, int sort, UErrorCode * status){ xmlNode *node = 0; struct icu_chain * chain = 0; + *status = U_ZERO_ERROR; + if (!xml_node ||xml_node->type != XML_ELEMENT_NODE // || strcmp((const char *) xml_node->name, "icu_chain") @@ -1034,6 +1040,7 @@ int icu_chain_step_next_token(struct icu_chain * chain, if (step->previous){ src16 = step->previous->buf16; /* tokens might be killed in previous steps, therefore looping */ + while (step->need_new_token && step->previous->more_tokens && !got_new_token) @@ -1087,8 +1094,10 @@ int icu_chain_step_next_token(struct icu_chain * chain, /* make sure to get new previous token if this one had been used up by recursive call to _same_ step */ - if (!step->more_tokens) + if (!step->more_tokens){ step->more_tokens = icu_chain_step_next_token(chain, step, status); + return step->more_tokens; // avoid one token count too much! + } break; default: @@ -1100,11 +1109,8 @@ int icu_chain_step_next_token(struct icu_chain * chain, return 0; /* if token disappered into thin air, tell caller */ - if (!step->buf16->utf16_len) - return 0; - - if (U_FAILURE(*status)) - return 0; + /* if (!step->buf16->utf16_len && !step->more_tokens) */ + /* return 0; */ return 1; } @@ -1119,6 +1125,8 @@ int icu_chain_assign_cstr(struct icu_chain * chain, if (!chain || !src8cstr) return 0; + chain->src8cstr = src8cstr; + stp = chain->steps; /* clear token count */ @@ -1131,8 +1139,9 @@ int icu_chain_assign_cstr(struct icu_chain * chain, stp = stp->previous; } - /* finally convert UTF8 to UTF16 string */ - icu_utf16_from_utf8_cstr(chain->src16, src8cstr, status); + /* finally convert UTF8 to UTF16 string if needed */ + if (chain->steps || chain->sort) + icu_utf16_from_utf8_cstr(chain->src16, chain->src8cstr, status); if (U_FAILURE(*status)) return 0; @@ -1147,27 +1156,49 @@ int icu_chain_next_token(struct icu_chain * chain, { int got_token = 0; - if (!chain || !chain->steps) + *status = U_ZERO_ERROR; + + if (!chain) return 0; - while(!got_token && chain->steps->more_tokens) - got_token = icu_chain_step_next_token(chain, chain->steps, status); - - if (got_token){ - chain->token_count++; + /* special case with no steps - same as index type binary */ + if (!chain->steps){ + if (chain->token_count) + return 0; + else { + chain->token_count++; + + if (chain->sort) + icu_sortkey8_from_utf16(chain->coll, + chain->sort8, chain->steps->buf16, + status); + return chain->token_count; + } + } + /* usual case, one or more icu chain steps existing */ + else { - icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status); + while(!got_token && chain->steps && chain->steps->more_tokens) + got_token = icu_chain_step_next_token(chain, chain->steps, status); - icu_sortkey8_from_utf16(chain->coll, - chain->sort8, chain->steps->buf16, status); + if (got_token){ + chain->token_count++; - return chain->token_count; + icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status); + + if (chain->sort) + icu_sortkey8_from_utf16(chain->coll, + chain->sort8, chain->steps->buf16, + status); + + return chain->token_count; + } } - + return 0; } -int icu_chain_get_token_count(struct icu_chain * chain) +int icu_chain_token_number(struct icu_chain * chain) { if (!chain) return 0; @@ -1176,8 +1207,7 @@ int icu_chain_get_token_count(struct icu_chain * chain) } - -const char * icu_chain_get_display(struct icu_chain * chain) +const char * icu_chain_token_display(struct icu_chain * chain) { if (chain->display8) return icu_buf_utf8_to_cstr(chain->display8); @@ -1185,15 +1215,18 @@ const char * icu_chain_get_display(struct icu_chain * chain) return 0; } -const char * icu_chain_get_norm(struct icu_chain * chain) +const char * icu_chain_token_norm(struct icu_chain * chain) { + if (!chain->steps) + return chain->src8cstr; + if (chain->norm8) return icu_buf_utf8_to_cstr(chain->norm8); return 0; } -const char * icu_chain_get_sort(struct icu_chain * chain) +const char * icu_chain_token_sortkey(struct icu_chain * chain) { if (chain->sort8) return icu_buf_utf8_to_cstr(chain->sort8);