X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ftest_icu_I18N.c;h=bc55d7d38a6b439542fcd0eec3a659f86f509464;hb=624d4ff2e24967bafd51c52e1eba9b4cd64ee79d;hp=00671f34d6c1da28d3f70fb196c54c45720a578f;hpb=a8b19203ad695ea4f749b8d460dc363f8a7085cd;p=pazpar2-moved-to-github.git diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index 00671f3..bc55d7d 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,29 +1,27 @@ -/* $Id: test_icu_I18N.c,v 1.21 2007-05-16 19:50:01 marc Exp $ - Copyright (c) 2006-2007, Index Data. +/* This file is part of Pazpar2. + Copyright (C) 2006-2008 Index Data - This file is part of Pazpar2. +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. - Pazpar2 is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. - Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - You should have received a copy of the GNU General Public License - along with Pazpar2; see the file LICENSE. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ // DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 #if HAVE_CONFIG_H -#include "cconfig.h" +#include #endif #define USE_TIMING 0 @@ -498,51 +496,155 @@ void test_icu_I18N_tokenizer(int argc, char **argv) void test_icu_I18N_chain(int argc, char **argv) { const char * en_str - = "O Romeo, Romeo! wherefore art thou Romeo?"; + = "O Romeo, Romeo! wherefore art thou\t Romeo?"; + + printf("ICU chain:\ninput: '%s'\n", en_str); UErrorCode status = U_ZERO_ERROR; - struct icu_chain_step * step = 0; - struct icu_chain * chain - = icu_chain_create((uint8_t *) "en:sentence", (uint8_t *) "en"); -/* step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, */ -/* (const uint8_t *) "[:Control:] Any-Remove", */ -/* &status); */ + //struct icu_chain_step * step = 0; + struct icu_chain * chain = 0; + + + const char * xml_str = "" + "" + "" + "" + "" + "" + "" + "" + ""; + + + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); + xmlNode *xml_node = xmlDocGetRootElement(doc); + YAZ_CHECK(xml_node); + + + chain = icu_chain_xml_config(xml_node, &status); + +#if 0 + chain = icu_chain_create((uint8_t *) "en:word", (uint8_t *) "en"); + step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, + (const uint8_t *) "[:Control:] Any-Remove", + &status); + step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize, + (const uint8_t *) "s", + &status); step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize, (const uint8_t *) "l", &status); -/* step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, */ -/* (const uint8_t *) */ -/* "[[:WhiteSpace:][:Punctuation:]] Any-Remove", */ -/* &status); */ + step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, + (const uint8_t *) + "[[:WhiteSpace:][:Punctuation:]] Any-Remove", + &status); step = icu_chain_insert_step(chain, ICU_chain_step_type_display, (const uint8_t *)"", &status); /* step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, */ /* (const uint8_t *) "Lower", */ /* &status); */ -/* step = icu_chain_insert_step(chain, ICU_chain_step_type_norm, */ -/* (const uint8_t *)"", */ -/* &status); */ -/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sort, */ + step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap, + (const uint8_t *) "l", + &status); + step = icu_chain_insert_step(chain, ICU_chain_step_type_index, + (const uint8_t *)"", + &status); +/* step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey, */ /* (const uint8_t *)"", */ /* &status); */ +#endif - + xmlFreeDoc(doc); + YAZ_CHECK(chain); YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status)); while (icu_chain_next_token(chain, &status)){ - printf("token %d norm: '%s' display: '%s'\n", + printf("%d '%s' '%s'\n", + icu_chain_get_token_count(chain), + icu_chain_get_norm(chain), + icu_chain_get_display(chain)); + } + + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7); + + + YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status)); + + while (icu_chain_next_token(chain, &status)){ + printf("%d '%s' '%s'\n", icu_chain_get_token_count(chain), icu_chain_get_norm(chain), icu_chain_get_display(chain)); } + + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3); + icu_chain_destroy(chain); } +void test_bug_1140(void) +{ + const char * en_str + = "O Romeo, Romeo! wherefore art thou\t Romeo?"; + + printf("ICU chain:\ninput: '%s'\n", en_str); + + UErrorCode status = U_ZERO_ERROR; + //struct icu_chain_step * step = 0; + struct icu_chain * chain = 0; + + const char * xml_str = "" + + /* if the first rule is normalize instead. Then it works */ +#if 0 + "" +#endif + "" + "" + "" + "" + "" + "" + ""; + + + xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); + xmlNode *xml_node = xmlDocGetRootElement(doc); + YAZ_CHECK(xml_node); + + chain = icu_chain_xml_config(xml_node, &status); + + xmlFreeDoc(doc); + YAZ_CHECK(chain); + + YAZ_CHECK(icu_chain_assign_cstr( + chain, "O Romeo, Romeo! wherefore art thou\t Romeo?", + &status)); + + while (icu_chain_next_token(chain, &status)) + ; + + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7); + + YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status)); + + while (icu_chain_next_token(chain, &status)){ + printf("%d '%s' '%s'\n", + icu_chain_get_token_count(chain), + icu_chain_get_norm(chain), + icu_chain_get_display(chain)); + } + + /* we expect 'what' 'is' 'this', i.e. 3 tokens */ + YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3); + + icu_chain_destroy(chain); +} #endif // HAVE_ICU @@ -561,7 +663,8 @@ int main(int argc, char **argv) test_icu_I18N_sortmap(argc, argv); test_icu_I18N_normalizer(argc, argv); test_icu_I18N_tokenizer(argc, argv); - //test_icu_I18N_chain(argc, argv); + test_icu_I18N_chain(argc, argv); + test_bug_1140(); #else // HAVE_ICU