X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ficu_I18N.c;h=f40b529294dd19bd505d38c7a2d853b5418bfbba;hb=3f46a4cb43b195b827e585b8806a9fcd3f23d466;hp=aaa0924f5bab3614de20c046cdd611709dc446cf;hpb=b55d58436ec1d0b56ea2833ab8a711f8b9232ed7;p=pazpar2-moved-to-github.git diff --git a/src/icu_I18N.c b/src/icu_I18N.c index aaa0924..f40b529 100644 --- a/src/icu_I18N.c +++ b/src/icu_I18N.c @@ -1,26 +1,24 @@ -/* $Id: icu_I18N.c,v 1.18 2007-05-21 10:14:08 marc Exp $ - Copyright (c) 2006-2007, Index Data. +/* This file is part of Pazpar2. + Copyright (C) 2006-2008 Index Data - This file is part of Pazpar2. +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. - Pazpar2 is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. - Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - You should have received a copy of the GNU General Public License - along with Pazpar2; see the file LICENSE. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ #if HAVE_CONFIG_H -#include "cconfig.h" +#include #endif #define USE_TIMING 0 @@ -83,7 +81,6 @@ struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity) return buf16; }; - struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16, size_t capacity) { @@ -172,8 +169,6 @@ struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8, buf8->utf8 = (uint8_t *) realloc(buf8->utf8, sizeof(uint8_t) * capacity); - buf8->utf8[0] = (uint8_t) 0; - buf8->utf8_len = 0; buf8->utf8_cap = capacity; } else { @@ -206,6 +201,16 @@ struct icu_buf_utf8 * icu_buf_utf8_copy(struct icu_buf_utf8 * dest8, }; +const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8) +{ + if (!src8 || src8->utf8_len == 0) + return ""; + if (src8->utf8_len == src8->utf8_cap) + src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1); + src8->utf8[src8->utf8_len] = '\0'; + return (const char *) src8->utf8; +} + void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8) { @@ -241,7 +246,7 @@ UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16, //if (*status != U_BUFFER_OVERFLOW_ERROR if (U_SUCCESS(*status) - && utf16_len < dest16->utf16_cap) + && utf16_len <= dest16->utf16_cap) dest16->utf16_len = utf16_len; else { dest16->utf16[0] = (UChar) 0; @@ -279,7 +284,7 @@ UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, // if (*status != U_BUFFER_OVERFLOW_ERROR if (U_SUCCESS(*status) - && utf16_len < dest16->utf16_cap) + && utf16_len <= dest16->utf16_cap) dest16->utf16_len = utf16_len; else { dest16->utf16[0] = (UChar) 0; @@ -316,7 +321,7 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, //if (*status != U_BUFFER_OVERFLOW_ERROR if (U_SUCCESS(*status) - && utf8_len < dest8->utf8_cap) + && utf8_len <= dest8->utf8_cap) dest8->utf8_len = utf8_len; else { dest8->utf8[0] = (uint8_t) 0; @@ -445,7 +450,7 @@ int icu_utf16_casemap(struct icu_buf_utf16 * dest16, } if (U_SUCCESS(*status) - && dest16_len < dest16->utf16_cap) + && dest16_len <= dest16->utf16_cap) dest16->utf16_len = dest16_len; else { dest16->utf16[0] = (UChar) 0; @@ -687,6 +692,7 @@ struct icu_normalizer * icu_normalizer_create(const char *rules, char action, UTRANS_FORWARD, 0, 0, normalizer->parse_error, status); + // yaz_log(YLOG_LOG, "utrans_open %p", normalizer->trans); break; case 'r': normalizer->trans @@ -695,6 +701,7 @@ struct icu_normalizer * icu_normalizer_create(const char *rules, char action, UTRANS_REVERSE , 0, 0, normalizer->parse_error, status); + // yaz_log(YLOG_LOG, "utrans_open %p", normalizer->trans); break; default: *status = U_UNSUPPORTED_ERROR; @@ -716,7 +723,10 @@ void icu_normalizer_destroy(struct icu_normalizer * normalizer){ if (normalizer->rules16) icu_buf_utf16_destroy(normalizer->rules16); if (normalizer->trans) + { + // yaz_log(YLOG_LOG, "utrans_close %p", normalizer->trans); utrans_close(normalizer->trans); + } free(normalizer); } }; @@ -764,21 +774,16 @@ struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain, step = (struct icu_chain_step *) malloc(sizeof(struct icu_chain_step)); step->type = type; - step->more_tokens = 0; - step->need_new_token = 1; - if (buf16) - step->buf16 = buf16; - else - step->buf16 = 0; + step->buf16 = buf16; // create auxilary objects switch(step->type) { case ICU_chain_step_type_display: break; - case ICU_chain_step_type_norm: + case ICU_chain_step_type_index: break; - case ICU_chain_step_type_sort: + case ICU_chain_step_type_sortkey: break; case ICU_chain_step_type_casemap: step->u.casemap = icu_casemap_create((char *) chain->locale, @@ -809,9 +814,9 @@ void icu_chain_step_destroy(struct icu_chain_step * step){ switch(step->type) { case ICU_chain_step_type_display: break; - case ICU_chain_step_type_norm: + case ICU_chain_step_type_index: break; - case ICU_chain_step_type_sort: + case ICU_chain_step_type_sortkey: break; case ICU_chain_step_type_casemap: icu_casemap_destroy(step->u.casemap); @@ -828,8 +833,7 @@ void icu_chain_step_destroy(struct icu_chain_step * step){ default: break; } - - + free(step); }; @@ -870,6 +874,7 @@ void icu_chain_destroy(struct icu_chain * chain) icu_buf_utf16_destroy(chain->src16); icu_chain_step_destroy(chain->steps); + free(chain); } }; @@ -880,6 +885,8 @@ struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, xmlNode *node = 0; struct icu_chain * chain = 0; + xmlChar *xml_id = 0; + xmlChar *xml_locale = 0; if (!xml_node ||xml_node->type != XML_ELEMENT_NODE @@ -887,8 +894,8 @@ struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, return 0; - xmlChar *xml_id = xmlGetProp(xml_node, (xmlChar *) "id"); - xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale"); + xml_id = xmlGetProp(xml_node, (xmlChar *) "id"); + xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale"); if (!xml_id || !strlen((const char *) xml_id) || !xml_locale || !strlen((const char *) xml_locale)) @@ -897,16 +904,19 @@ struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, chain = icu_chain_create((const uint8_t *) xml_id, (const uint8_t *) xml_locale); + xmlFree(xml_id); + xmlFree(xml_locale); if (!chain) return 0; for (node = xml_node->children; node; node = node->next) { + xmlChar *xml_rule = 0; + struct icu_chain_step * step = 0; if (node->type != XML_ELEMENT_NODE) continue; - xmlChar *xml_rule = xmlGetProp(node, (xmlChar *) "rule"); - struct icu_chain_step * step = 0; + xml_rule = xmlGetProp(node, (xmlChar *) "rule"); if (!strcmp((const char *) node->name, (const char *) "casemap")){ @@ -929,16 +939,17 @@ struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, (const uint8_t *) "", status); } else if (!strcmp((const char *) node->name, - (const char *) "normal")){ - step = icu_chain_insert_step(chain, ICU_chain_step_type_norm, + (const char *) "index")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_index, (const uint8_t *) "", status); } else if (!strcmp((const char *) node->name, - (const char *) "sort")){ - step = icu_chain_insert_step(chain, ICU_chain_step_type_sort, + (const char *) "sortkey")){ + step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey, (const uint8_t *) "", status); } + xmlFree(xml_rule); if (!step || U_FAILURE(*status)){ icu_chain_destroy(chain); return 0; @@ -978,10 +989,10 @@ struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, case ICU_chain_step_type_display: buf16 = src16; break; - case ICU_chain_step_type_norm: + case ICU_chain_step_type_index: buf16 = src16; break; - case ICU_chain_step_type_sort: + case ICU_chain_step_type_sortkey: buf16 = src16; break; case ICU_chain_step_type_casemap: @@ -1046,10 +1057,10 @@ int icu_chain_step_next_token(struct icu_chain * chain, case ICU_chain_step_type_display: icu_utf16_to_utf8(chain->display8, src16, status); break; - case ICU_chain_step_type_norm: + case ICU_chain_step_type_index: icu_utf16_to_utf8(chain->norm8, src16, status); break; - case ICU_chain_step_type_sort: + case ICU_chain_step_type_sortkey: icu_utf16_to_utf8(chain->sort8, src16, status); break; case ICU_chain_step_type_casemap: @@ -1125,6 +1136,7 @@ int icu_chain_assign_cstr(struct icu_chain * chain, while (stp){ stp->more_tokens = 1; + stp->need_new_token = 1; stp = stp->previous; } @@ -1170,7 +1182,7 @@ int icu_chain_get_token_count(struct icu_chain * chain) const char * icu_chain_get_display(struct icu_chain * chain) { if (chain->display8) - return (const char *) chain->display8->utf8; + return icu_buf_utf8_to_cstr(chain->display8); return 0; }; @@ -1178,7 +1190,7 @@ const char * icu_chain_get_display(struct icu_chain * chain) const char * icu_chain_get_norm(struct icu_chain * chain) { if (chain->norm8) - return (const char *) chain->norm8->utf8; + return icu_buf_utf8_to_cstr(chain->norm8); return 0; }; @@ -1186,7 +1198,7 @@ const char * icu_chain_get_norm(struct icu_chain * chain) const char * icu_chain_get_sort(struct icu_chain * chain) { if (chain->sort8) - return (const char *) chain->sort8->utf8; + return icu_buf_utf8_to_cstr(chain->sort8); return 0; };