X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Ficu_chain.c;h=b672975293c24ce880e6d18a0f7cbc15833e6a1e;hp=730eddea1a96217d6b4a7b0f08479fcea3c7b12a;hb=0b121738c22cd67b153792d645746447d8a81767;hpb=d05e5d54fcac20824e0efa3d939e5fbb1505964a diff --git a/src/icu_chain.c b/src/icu_chain.c index 730edde..b672975 100644 --- a/src/icu_chain.c +++ b/src/icu_chain.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2013 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ @@ -291,7 +291,7 @@ struct icu_chain *icu_chain_xml_config(const xmlNode *xml_node, if (!rule && strcmp((const char *) node->name, "display")) { - yaz_log(YLOG_WARN, "Missing attribute rule for element %s", + yaz_log(YLOG_WARN, "Missing attribute 'rule' for element %s", (const char *) node->name); no_errors++; continue; @@ -346,6 +346,9 @@ struct icu_chain *icu_chain_xml_config(const xmlNode *xml_node, } if (step && U_FAILURE(*status)) { + yaz_log(YLOG_WARN, "ICU Error %d %s for element %s, rule %s", + *status, u_errorName(*status), node->name, rule ? + rule : ""); no_errors++; break; } @@ -370,6 +373,8 @@ struct icu_iter { int token_count; size_t org_start; size_t org_len; + size_t utf8_base; + size_t utf16_base; struct icu_chain_step *steps; }; @@ -501,6 +506,7 @@ void icu_iter_first(yaz_icu_iter_t iter, const char *src8cstr) icu_buf_utf16_copy(iter->org, src); iter->token_count = 0; iter->org_start = 0; + iter->utf8_base = iter->utf16_base = 0; iter->org_len = src->utf16_len; iter->last = icu_iter_invoke(iter, iter->steps, src); } @@ -563,25 +569,32 @@ int icu_iter_get_token_number(yaz_icu_iter_t iter) void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len) { - /* save full length of org since we're gonna cut it */ - int32_t save_len = iter->org->utf16_len; + int32_t len1 = 0, len2 = 0; + UErrorCode status = U_ZERO_ERROR; - struct icu_buf_utf8 *tmp = icu_buf_utf8_create(0); - UErrorCode status; + if (iter->org_start < iter->utf16_base) + { + iter->utf8_base = 0; + iter->utf16_base = 0; + } + u_strToUTF8(0, 0, &len1, + iter->org->utf16 + iter->utf16_base, + iter->org_start - iter->utf16_base, + &status); - iter->org->utf16_len = iter->org_start; - icu_utf16_to_utf8(tmp, iter->org, &status); - if (U_SUCCESS(status)) - *start = tmp->utf8_len; - else - *start = 0; - iter->org->utf16_len = iter->org_start + iter->org_len; - icu_utf16_to_utf8(tmp, iter->org, &status); - if (U_SUCCESS(status)) - *len = tmp->utf8_len - *start; - else - *len = 0; - iter->org->utf16_len = save_len; + status = U_ZERO_ERROR; + + *start = len1 + iter->utf8_base; + + u_strToUTF8(0, 0, &len2, + iter->org->utf16 + iter->utf16_base, + iter->org_start - iter->utf16_base + iter->org_len, + &status); + + *len = len2 - len1; + + iter->utf8_base = *start; + iter->utf16_base = iter->org_start; } int icu_chain_assign_cstr(struct icu_chain *chain, const char *src8cstr,