X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ficu_chain_test.c;h=192d28cc4e2b341d29679837a058c5e43933c86d;hb=47fcaa2307337683215b398e3eaad2e06c5fe9db;hp=ddc926e1124be769922014af50869f5f574512e7;hpb=d060969f7c7f2a41142ae5dfdb945cda973c91ee;p=pazpar2-moved-to-github.git diff --git a/src/icu_chain_test.c b/src/icu_chain_test.c index ddc926e..192d28c 100644 --- a/src/icu_chain_test.c +++ b/src/icu_chain_test.c @@ -1,17 +1,39 @@ -/** - gcc -I/usr/include/libxml2 -lxml2 -o icu-xml-convert icu-xml-convert.c +/* $Id: icu_chain_test.c,v 1.6 2007-07-05 18:40:24 adam Exp $ + Copyright (c) 2006-2007, Index Data. + +This file is part of Pazpar2. + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Pazpar2; see the file LICENSE. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ -#include +#if HAVE_CONFIG_H +#include "cconfig.h" +#endif + #include -#define _GNU_SOURCE #include #include //#include #include + +#ifdef HAVE_ICU + #include #include @@ -31,63 +53,60 @@ static struct config_t { void print_option_error(const struct config_t *p_config) { - fprintf(stderr, "Calling error, valid options are :\n"); - fprintf(stderr, "icu_chain_test\n" - " [-c (path/to/config/file.xml)]\n" - " [-p (a|c|l|t)] print ICU info \n" - " [-x] XML output\n" - "\n" - "Examples:\n" - "cat hugetextfile.txt | ./icu_chain_test -c config.xml \n" - "./icu_chain_test -p c\n" - "./icu_chain_test -p l -x\n" - "./icu_chain_test -p t -x\n" + fprintf(stderr, "Calling error, valid options are :\n"); + fprintf(stderr, "icu_chain_test\n" + " [-c (path/to/config/file.xml)]\n" + " [-p (a|c|l|t)] print ICU info \n" + " [-x] XML output\n" + "\n" + "Examples:\n" + "cat hugetextfile.txt | ./icu_chain_test -c config.xml \n" + "./icu_chain_test -p c\n" + "./icu_chain_test -p l -x\n" + "./icu_chain_test -p t -x\n" ); - exit(1); + exit(1); } -void read_params(int argc, char **argv, struct config_t *p_config){ - char *arg; - int ret; - - /* set default parameters */ - p_config->conffile[0] = 0; - p_config->print[0] = 0; - p_config->xmloutput = 0; - p_config->chain = 0; - p_config->infile = stdin; - p_config->outfile = stdout; - - /* set up command line parameters */ - - while ((ret = options("c:p:x", argv, argc, &arg)) != -2) +void read_params(int argc, char **argv, struct config_t *p_config) +{ + char *arg; + int ret; + + /* set default parameters */ + p_config->conffile[0] = 0; + p_config->print[0] = 0; + p_config->xmloutput = 0; + p_config->chain = 0; + p_config->infile = stdin; + p_config->outfile = stdout; + + /* set up command line parameters */ + + while ((ret = options("c:p:x", argv, argc, &arg)) != -2) { - switch (ret) + switch (ret) { case 'c': - strcpy(p_config->conffile, arg); - break; + strcpy(p_config->conffile, arg); + break; case 'p': - strcpy(p_config->print, arg); - break; + strcpy(p_config->print, arg); + break; case 'x': p_config->xmloutput = 1; - break; + break; default: - print_option_error(p_config); + print_option_error(p_config); } } - //p_config->infile = fopen("/etc/passwd", "r"); - - - - if ((!strlen(p_config->conffile) - && !strlen(p_config->print)) - || !config.infile - || !config.outfile) - - print_option_error(p_config); + if ((!strlen(p_config->conffile) + && !strlen(p_config->print)) + || !config.infile + || !config.outfile) + + print_option_error(p_config); }; @@ -117,7 +136,7 @@ static void print_icu_converters(const struct config_t *p_config) count = ucnv_countAvailable(); if (p_config->xmloutput) fprintf(config.outfile, "\n", - count, ucnv_getDefaultName()); + count, ucnv_getDefaultName()); else { fprintf(config.outfile, "Available ICU converters: %d\n", count); fprintf(config.outfile, "Default ICU Converter is: '%s'\n", ucnv_getDefaultName()); @@ -138,254 +157,254 @@ static void print_icu_converters(const struct config_t *p_config) static void print_icu_transliterators(const struct config_t *p_config) { - int32_t count; - int32_t i; - - count = utrans_countAvailableIDs(); - - int32_t buf_cap = 128; - char buf[buf_cap]; - - if (p_config->xmloutput) - fprintf(config.outfile, "\n", count); - else - fprintf(config.outfile, "Available ICU transliterators: %d\n", count); - - for(i=0;ixmloutput) + fprintf(config.outfile, "\n", count); + else + fprintf(config.outfile, "Available ICU transliterators: %d\n", count); + + for(i = 0; i xmloutput) - fprintf(config.outfile, "\n", buf); - else - fprintf(config.outfile, " %s", buf); + utrans_getAvailableID(i, buf, buf_cap); + if (p_config->xmloutput) + fprintf(config.outfile, "\n", buf); + else + fprintf(config.outfile, " %s", buf); } - - if (p_config->xmloutput){ - fprintf(config.outfile, "\n"); - } - else + + if (p_config->xmloutput){ + fprintf(config.outfile, "\n"); + } + else { - fprintf(config.outfile, "\n\nUnicode Set Patterns:\n" - " Pattern Description\n" - " Ranges [a-z] The lower case letters a through z\n" - " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n" - " String [abc{def}] chars a, b and c, and string 'def'\n" - " Categories [\\p{Letter}] Perl General Category 'Letter'.\n" - " Categories [:Letter:] Posix General Category 'Letter'.\n" - "\n" - " Combination Example\n" - " Union [[:Greek:] [:letter:]]\n" - " Intersection [[:Greek:] & [:letter:]]\n" - " Set Complement [[:Greek:] - [:letter:]]\n" - " Complement [^[:Greek:] [:letter:]]\n" - "\n" + fprintf(config.outfile, "\n\nUnicode Set Patterns:\n" + " Pattern Description\n" + " Ranges [a-z] The lower case letters a through z\n" + " Named Chars [abc123] The six characters a,b,c,1,2 and 3\n" + " String [abc{def}] chars a, b and c, and string 'def'\n" + " Categories [\\p{Letter}] Perl General Category 'Letter'.\n" + " Categories [:Letter:] Posix General Category 'Letter'.\n" + "\n" + " Combination Example\n" + " Union [[:Greek:] [:letter:]]\n" + " Intersection [[:Greek:] & [:letter:]]\n" + " Set Complement [[:Greek:] - [:letter:]]\n" + " Complement [^[:Greek:] [:letter:]]\n" + "\n" "see: http://icu.sourceforge.net/userguide/unicodeSet.html\n" - "\n" - "Examples:\n" - " [:Punctuation:] Any-Remove\n" - " [:Cased-Letter:] Any-Upper\n" - " [:Control:] Any-Remove\n" - " [:Decimal_Number:] Any-Remove\n" - " [:Final_Punctuation:] Any-Remove\n" - " [:Georgian:] Any-Upper\n" - " [:Katakana:] Any-Remove\n" - " [:Arabic:] Any-Remove\n" - " [:Punctuation:] Remove\n" - " [[:Punctuation:]-[.,]] Remove\n" - " [:Line_Separator:] Any-Remove\n" - " [:Math_Symbol:] Any-Remove\n" - " Lower; [:^Letter:] Remove (word tokenization)\n" - " [:^Number:] Remove (numeric tokenization)\n" - " [:^Katagana:] Remove (remove everything except Katagana)\n" - " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n" - " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from characters)\n" - " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n" - " [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n" - "\n" - "see http://icu.sourceforge.net/userguide/Transform.html\n" - " http://www.unicode.org/Public/UNIDATA/UCD.html\n" - " http://icu.sourceforge.net/userguide/Transform.html\n" - " http://icu.sourceforge.net/userguide/TransformRule.html\n" - ); - - - fprintf(config.outfile, "\n\n"); - + "\n" + "Examples:\n" + " [:Punctuation:] Any-Remove\n" + " [:Cased-Letter:] Any-Upper\n" + " [:Control:] Any-Remove\n" + " [:Decimal_Number:] Any-Remove\n" + " [:Final_Punctuation:] Any-Remove\n" + " [:Georgian:] Any-Upper\n" + " [:Katakana:] Any-Remove\n" + " [:Arabic:] Any-Remove\n" + " [:Punctuation:] Remove\n" + " [[:Punctuation:]-[.,]] Remove\n" + " [:Line_Separator:] Any-Remove\n" + " [:Math_Symbol:] Any-Remove\n" + " Lower; [:^Letter:] Remove (word tokenization)\n" + " [:^Number:] Remove (numeric tokenization)\n" + " [:^Katagana:] Remove (remove everything except Katagana)\n" + " Lower;[[:WhiteSpace:][:Punctuation:]] Remove (word tokenization)\n" + " NFD; [:Nonspacing Mark:] Remove; NFC (removes accents from characters)\n" + " [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n" + " [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n" + "\n" + "see http://icu.sourceforge.net/userguide/Transform.html\n" + " http://www.unicode.org/Public/UNIDATA/UCD.html\n" + " http://icu.sourceforge.net/userguide/Transform.html\n" + " http://icu.sourceforge.net/userguide/TransformRule.html\n" + ); + + + fprintf(config.outfile, "\n\n"); + } } static void print_icu_xml_locales(const struct config_t *p_config) { - int32_t count; - int32_t i; - UErrorCode status = U_ZERO_ERROR; - - UChar keyword[64]; - int32_t keyword_len = 0; - char keyword_str[128]; - int32_t keyword_str_len = 0; - - UChar language[64]; - int32_t language_len = 0; - char lang_str[128]; - int32_t lang_str_len = 0; - - UChar script[64]; - int32_t script_len = 0; - char script_str[128]; - int32_t script_str_len = 0; - - UChar location[64]; - int32_t location_len = 0; - char location_str[128]; - int32_t location_str_len = 0; - - UChar variant[64]; - int32_t variant_len = 0; - char variant_str[128]; - int32_t variant_str_len = 0; - - UChar name[64]; - int32_t name_len = 0; - char name_str[128]; - int32_t name_str_len = 0; - - UChar localname[64]; - int32_t localname_len = 0; - char localname_str[128]; - int32_t localname_str_len = 0; - - count = uloc_countAvailable() ; - - if (p_config->xmloutput){ + int32_t count; + int32_t i; + UErrorCode status = U_ZERO_ERROR; - fprintf(config.outfile, "\n", - count, uloc_getDefault(), ucol_countAvailable()); - } + UChar keyword[64]; + int32_t keyword_len = 0; + char keyword_str[128]; + int32_t keyword_str_len = 0; + + UChar language[64]; + int32_t language_len = 0; + char lang_str[128]; + int32_t lang_str_len = 0; + + UChar script[64]; + int32_t script_len = 0; + char script_str[128]; + int32_t script_str_len = 0; + + UChar location[64]; + int32_t location_len = 0; + char location_str[128]; + int32_t location_str_len = 0; + + UChar variant[64]; + int32_t variant_len = 0; + char variant_str[128]; + int32_t variant_str_len = 0; + + UChar name[64]; + int32_t name_len = 0; + char name_str[128]; + int32_t name_str_len = 0; + + UChar localname[64]; + int32_t localname_len = 0; + char localname_str[128]; + int32_t localname_str_len = 0; + + count = uloc_countAvailable() ; + + if (p_config->xmloutput){ + + fprintf(config.outfile, "\n", + count, uloc_getDefault(), ucol_countAvailable()); + } - for(i=0;ixmloutput){ - fprintf(config.outfile, ""); - if (strlen(localname_str)) - fprintf(config.outfile, "%s", localname_str); - fprintf(config.outfile, "\n"); - } - else if (1 == p_config->xmloutput){ - fprintf(config.outfile, "%s", uloc_getAvailable(i)); - fprintf(config.outfile, " | "); - if (strlen(name_str)) - fprintf(config.outfile, "%s", name_str); - fprintf(config.outfile, " | "); - if (strlen(localname_str)) - fprintf(config.outfile, "%s", localname_str); - fprintf(config.outfile, "\n"); + language_len + = uloc_getDisplayLanguage(uloc_getAvailable(i), "en", + language, 64, + &status); + + u_strToUTF8(lang_str, 128, &lang_str_len, + language, language_len, + &status); + + + script_len + = uloc_getDisplayScript(uloc_getAvailable(i), "en", + script, 64, + &status); + + u_strToUTF8(script_str, 128, &script_str_len, + script, script_len, + &status); + + location_len + = uloc_getDisplayCountry(uloc_getAvailable(i), "en", + location, 64, + &status); + + u_strToUTF8(location_str, 128, &location_str_len, + location, location_len, + &status); + + variant_len + = uloc_getDisplayVariant(uloc_getAvailable(i), "en", + variant, 64, + &status); + + u_strToUTF8(variant_str, 128, &variant_str_len, + variant, variant_len, + &status); + + name_len + = uloc_getDisplayName(uloc_getAvailable(i), "en", + name, 64, + &status); + + u_strToUTF8(name_str, 128, &name_str_len, + name, name_len, + &status); + + localname_len + = uloc_getDisplayName(uloc_getAvailable(i), uloc_getAvailable(i), + localname, 64, + &status); + + u_strToUTF8(localname_str, 128, &localname_str_len, + localname, localname_len, + &status); + + + if (p_config->xmloutput){ + fprintf(config.outfile, ""); + if (strlen(localname_str)) + fprintf(config.outfile, "%s", localname_str); + fprintf(config.outfile, "\n"); + } + else if (1 == p_config->xmloutput){ + fprintf(config.outfile, "%s", uloc_getAvailable(i)); + fprintf(config.outfile, " | "); + if (strlen(name_str)) + fprintf(config.outfile, "%s", name_str); + fprintf(config.outfile, " | "); + if (strlen(localname_str)) + fprintf(config.outfile, "%s", localname_str); + fprintf(config.outfile, "\n"); + } + else + fprintf(config.outfile, "%s ", uloc_getAvailable(i)); } + if (p_config->xmloutput) + fprintf(config.outfile, "\n"); else - fprintf(config.outfile, "%s ", uloc_getAvailable(i)); - } - if (p_config->xmloutput) - fprintf(config.outfile, "\n"); - else - fprintf(config.outfile, "\n"); - - if(U_FAILURE(status)) { - fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status)); - exit(status); - } + fprintf(config.outfile, "\n"); + + if(U_FAILURE(status)) { + fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status)); + exit(status); + } } static void print_info(const struct config_t *p_config) { - if (p_config->xmloutput) - fprintf(config.outfile, "\n" - "\n"); + if (p_config->xmloutput) + fprintf(config.outfile, "\n" + "\n"); if ('c' == config.print[0]) print_icu_converters(&config); @@ -402,16 +421,15 @@ static void print_info(const struct config_t *p_config) if (p_config->xmloutput) fprintf(config.outfile, "\n"); - exit(0); + exit(0); }; static void process_text_file(const struct config_t *p_config) { - char * line = 0; - size_t line_cap = 0; - ssize_t line_len; + char *line = 0; + char linebuf[1024]; xmlDoc *doc = xmlParseFile(config.conffile); xmlNode *xml_node = xmlDocGetRootElement(doc); @@ -435,7 +453,8 @@ static void process_text_file(const struct config_t *p_config) "\n"); // read input lines for processing - while ((line_len = getline(&line, &line_cap, config.infile)) != -1) { + while ((line=fgets(linebuf, sizeof(linebuf)-1, config.infile))) + { success = icu_chain_assign_cstr(config.chain, line, &status); line_count++; @@ -463,7 +482,7 @@ static void process_text_file(const struct config_t *p_config) } - if (p_config->xmloutput) + if (p_config->xmloutput) fprintf(config.outfile, "\n" "\n"); @@ -474,10 +493,14 @@ static void process_text_file(const struct config_t *p_config) free(line); }; +#endif // HAVE_ICU + int main(int argc, char **argv) { +#ifdef HAVE_ICU + read_params(argc, argv, &config); if (config.conffile && strlen(config.conffile)) @@ -485,8 +508,16 @@ int main(int argc, char **argv) if (config.print && strlen(config.print)) print_info(&config); - - + +#else // HAVE_ICU + + printf("ICU not available on your system.\n" + "Please install libicu36-dev and icu-doc or similar, " + "re-configure and re-compile\n"); + + +#endif // HAVE_ICU + return(0); };