From: Marc Cromme Date: Tue, 22 May 2007 08:26:59 +0000 (+0000) Subject: started stand-allone ICU test X-Git-Tag: PAZPAR2.1.0.0~114 X-Git-Url: http://git.indexdata.com/?a=commitdiff_plain;h=4fc03d50d3638f680887d012e6d0586aa8560d8f;p=pazpar2-moved-to-github.git started stand-allone ICU test --- diff --git a/src/Makefile.am b/src/Makefile.am index 26867af..ded9a7f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,6 +1,8 @@ -# $Id: Makefile.am,v 1.22 2007-05-15 15:50:48 adam Exp $ +# $Id: Makefile.am,v 1.23 2007-05-22 08:26:59 marc Exp $ bin_PROGRAMS = pazpar2 +# icu_chain_test + check_PROGRAMS = test_config \ test_icu_I18N \ test_record \ @@ -33,6 +35,9 @@ libpazpar2_a_SOURCES = config.c config.h eventl.c eventl.h \ pazpar2_SOURCES = pazpar2.c pazpar2_LDADD = libpazpar2.a $(YAZLIB) $(ICU_LIBS) +#icu_chain_test_SOURCES = icu_chain_test.c icu_I18N +#icu_chain_test_LDADD = $(YAZLIB) $(ICU_LIBS) + test_config_SOURCES = test_config.c test_config_LDADD = libpazpar2.a $(YAZLIB) diff --git a/src/icu_chain_test.c b/src/icu_chain_test.c new file mode 100644 index 0000000..ac749e0 --- /dev/null +++ b/src/icu_chain_test.c @@ -0,0 +1,377 @@ +/** + gcc -I/usr/include/libxml2 -lxml2 -o icu-xml-convert icu-xml-convert.c + */ + +#include +#include + +#include "icu_I18N.h" + +/* commando line parameters */ +static struct config_t { + //char infile[1024]; + //char locale[128]; + char conffile[1024]; + //char outfile[1024]; + int verbatim; + int print; +} config; + + + +void print_option_error(const struct config_t *p_config) +{ + fprintf(stderr, "Calling error, valid options are :\n"); + fprintf(stderr, "icu_chain_test\n" + " [-c (path/to/config/file.xml)]\n" + " [-p (c|l|t)] print available info \n" + " [-v] verbouse output\n" + "\n"); + exit(1); +} + +void read_params(int argc, char **argv, struct config_t *p_config){ + char *arg; + int ret; + + /* set default parameters */ + p_config->conffile[0] = 0; + + /* set up command line parameters */ + + while ((ret = options("c:p:v", argv, argc, &arg)) != -2) + { + switch (ret) + { + case 'c': + strcpy(p_config->conffile, arg); + break; + case 'p': + strcpy(p_config->print, arg); + break; + case 'v': + if (arg) + p_config->verbatim = atoi(arg); + else + p_config->verbatim = 1; + break; + default: + print_option_error(p_config); + } + } + + + if (! strlen(p_config->conffile)) + print_option_error(); +} + +/* UConverter *conv; */ +/* conv = ucnv_open("utf-8", &status); */ +/* assert(U_SUCCESS(status)); */ + +/* *ustr16_len */ +/* = ucnv_toUChars(conv, ustr16, 1024, */ +/* (const char *) *xstr8, strlen((const char *) *xstr8), */ +/* &status); */ + + + +/* ucnv_fromUChars(conv, */ +/* (char *) *xstr8, strlen((const char *) *xstr8), */ +/* ustr16, *ustr16_len, */ +/* &status); */ +/* ucnv_close(conv); */ + + +static void print_icu_converters(const struct config_t *p_config) +{ + int32_t count; + int32_t i; + + count = ucnv_countAvailable(); + printf("Available ICU converters: %d\n", count); + + for(i=0;iverbatim){ + printf("\n"); + printf("\n\n", count); + } else + printf("Available ICU transliterators: %d\n", count); + + for(i=0;iverbatim) + printf("\n", buf); + else + printf(" %s", buf); + } + + if (1 < p_config->verbatim){ + printf("\n\n"); + } + else + { + printf("\n\nUnicode Set Patterns:\n" + " Pattern Description\n" + " Ranges [a-z] The lower case letters a through z\n" + " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n" + " String [abc{def}] chars a, b and c, and string 'def'\n" + " Categories [\\p{Letter}] Perl General Category 'Letter'.\n" + " Categories [:Letter:] Posix General Category 'Letter'.\n" + "\n" + " Combination Example\n" + " Union [[:Greek:] [:letter:]]\n" + " Intersection [[:Greek:] & [:letter:]]\n" + " Set Complement [[:Greek:] - [:letter:]]\n" + " Complement [^[:Greek:] [:letter:]]\n" + "\n" + "see: http://icu.sourceforge.net/userguide/unicodeSet.html\n" + "\n" + "Examples:\n" + " [:Punctuation:] Any-Remove\n" + " [:Cased-Letter:] Any-Upper\n" + " [:Control:] Any-Remove\n" + " [:Decimal_Number:] Any-Remove\n" + " [:Final_Punctuation:] Any-Remove\n" + " [:Georgian:] Any-Upper\n" + " [:Katakana:] Any-Remove\n" + " [:Arabic:] Any-Remove\n" + " [:Punctuation:] Remove\n" + " [[:Punctuation:]-[.,]] Remove\n" + " [:Line_Separator:] Any-Remove\n" + " [:Math_Symbol:] Any-Remove\n" + " Lower; [:^Letter:] Remove (word tokenization)\n" + " [:^Number:] Remove (numeric tokenization)\n" + " [:^Katagana:] Remove (remove everything except Katagana)\n" + " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n" + " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from characters)\n" + " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n" + " [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n" + "\n" + "see http://icu.sourceforge.net/userguide/Transform.html\n" + " http://www.unicode.org/Public/UNIDATA/UCD.html\n" + " http://icu.sourceforge.net/userguide/Transform.html\n" + " http://icu.sourceforge.net/userguide/TransformRule.html\n" + ); + } + + + printf("\n\n"); + + + exit(0); +} + +static void print_icu_xml_locales(const struct config_t *p_config) +{ + int32_t count; + int32_t i; + UErrorCode status = U_ZERO_ERROR; + + UChar keyword[64]; + int32_t keyword_len = 0; + char keyword_str[128]; + int32_t keyword_str_len = 0; + + UChar language[64]; + int32_t language_len = 0; + char lang_str[128]; + int32_t lang_str_len = 0; + + UChar script[64]; + int32_t script_len = 0; + char script_str[128]; + int32_t script_str_len = 0; + + UChar location[64]; + int32_t location_len = 0; + char location_str[128]; + int32_t location_str_len = 0; + + UChar variant[64]; + int32_t variant_len = 0; + char variant_str[128]; + int32_t variant_str_len = 0; + + UChar name[64]; + int32_t name_len = 0; + char name_str[128]; + int32_t name_str_len = 0; + + UChar localname[64]; + int32_t localname_len = 0; + char localname_str[128]; + int32_t localname_str_len = 0; + + count = uloc_countAvailable() ; + + if (1 < p_config->verbatim){ + printf(""); + printf("\n\n", + count, uloc_getDefault(), ucol_countAvailable()); + } + + for(i=0;iverbatim){ + printf(""); + if (strlen(localname_str)) + printf("%s", localname_str); + printf("\n"); + } + else if (1 == p_config->verbatim){ + printf("%s", uloc_getAvailable(i)); + printf(" | "); + if (strlen(name_str)) + printf("%s", name_str); + printf(" | "); + if (strlen(localname_str)) + printf("%s", localname_str); + printf("\n"); + } + else + printf("%s ", uloc_getAvailable(i)); + } + if (1 < p_config->verbatim) + printf("\n\n"); + else + printf("\n"); + + if(U_FAILURE(status)) { + fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status)); + exit(status); + } + exit(0); +} + + +int main(int argc, char **argv) { + + //LIBXML_TEST_VERSION; + + read_params(argc, argv, &config); + + if (config.debug) + print_options(&config); + + if ('c' == config.print[0]) + print_icu_converters(&config); + + if ('l' == config.print[0]) + print_icu_xml_locales(&config); + + if ('t' == config.print[0]) + print_icu_transliterators(&config); + + //xmlCleanupParser(); + //xmlMemoryDump(); + return(0); +} + +