X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=util%2Fyaz-icu.c;h=10fa2f599ffa53da33e09ada8850af81893c0f07;hp=c6e7a0e2eb11fb0bfebe497194f211737f581391;hb=36f0d152f5f2b4d6ef4bcb01a041f4c16c1deaf8;hpb=479969ddd4d243955f2e64ef8571926429ad4f37 diff --git a/util/yaz-icu.c b/util/yaz-icu.c index c6e7a0e..10fa2f5 100644 --- a/util/yaz-icu.c +++ b/util/yaz-icu.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2007, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2009 Index Data * See the file LICENSE for details. - * - * $Id: yaz-icu.c,v 1.9 2007-11-08 08:17:18 adam Exp $ */ #if HAVE_CONFIG_H @@ -16,20 +14,24 @@ #include - #if YAZ_HAVE_ICU #include #include +#include +#include +#include -#include +#include +#include /* commando line and config parameters */ static struct config_t { char conffile[1024]; char print[1024]; int xmloutput; - struct icu_chain * chain; + int sortoutput; + yaz_icu_chain_t chain; FILE * infile; FILE * outfile; } config; @@ -42,6 +44,7 @@ void print_option_error(const struct config_t *p_config) fprintf(stderr, "yaz-icu\n" " [-c (path/to/config/file.xml)]\n" " [-p (a|c|l|t)] print ICU info \n" + " [-s] Show sort normalization key\n" " [-x] XML output\n" "\n" "Examples:\n" @@ -51,10 +54,10 @@ void print_option_error(const struct config_t *p_config) "./yaz-icu -p t -x\n" "\n" "Example ICU chain XML configuration file:\n" - "\n" - " \n" + "\n" + " \n" " \n" - " \n" + " \n" " \n" "\n" ); @@ -70,13 +73,14 @@ void read_params(int argc, char **argv, struct config_t *p_config) p_config->conffile[0] = 0; p_config->print[0] = 0; p_config->xmloutput = 0; + p_config->sortoutput = 0; p_config->chain = 0; p_config->infile = stdin; p_config->outfile = stdout; /* set up command line parameters */ - while ((ret = options("c:p:x", argv, argc, &arg)) != -2) + while ((ret = options("c:p:xs", argv, argc, &arg)) != -2) { switch (ret) { @@ -86,10 +90,14 @@ void read_params(int argc, char **argv, struct config_t *p_config) case 'p': strcpy(p_config->print, arg); break; + case 's': + p_config->sortoutput = 1; + break; case 'x': p_config->xmloutput = 1; break; default: + printf("Got %d\n", ret); print_option_error(p_config); } } @@ -136,7 +144,8 @@ static void print_icu_converters(const struct config_t *p_config) ucnv_getDefaultName()); } - for(i=0;ixmloutput) fprintf(config.outfile, "\n", ucnv_getAvailableName(i)); @@ -153,7 +162,7 @@ static void print_icu_converters(const struct config_t *p_config) static void print_icu_transliterators(const struct config_t *p_config) { int32_t buf_cap = 128; - char buf[buf_cap]; + char buf[128]; int32_t i; int32_t count = utrans_countAvailableIDs(); @@ -171,7 +180,8 @@ static void print_icu_transliterators(const struct config_t *p_config) fprintf(config.outfile, " %s", buf); } - if (p_config->xmloutput){ + if (p_config->xmloutput) + { fprintf(config.outfile, "\n"); } else @@ -268,8 +278,8 @@ static void print_icu_xml_locales(const struct config_t *p_config) count = uloc_countAvailable() ; - if (p_config->xmloutput){ - + if (p_config->xmloutput) + { fprintf(config.outfile, "\n", count, uloc_getDefault(), ucol_countAvailable()); } @@ -343,7 +353,8 @@ static void print_icu_xml_locales(const struct config_t *p_config) &status); - if (p_config->xmloutput){ + if (p_config->xmloutput) + { fprintf(config.outfile, "\n"); } - else if (1 == p_config->xmloutput){ + else if (1 == p_config->xmloutput) + { fprintf(config.outfile, "%s", uloc_getAvailable(i)); fprintf(config.outfile, " | "); if (strlen(name_str)) @@ -385,9 +397,10 @@ static void print_icu_xml_locales(const struct config_t *p_config) else fprintf(config.outfile, "\n"); - if(U_FAILURE(status)) { + if(U_FAILURE(status)) + { fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status)); - exit(status); + exit(2); } } @@ -425,38 +438,29 @@ static void process_text_file(const struct config_t *p_config) xmlDoc *doc = xmlParseFile(config.conffile); xmlNode *xml_node = xmlDocGetRootElement(doc); - xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale"); long unsigned int token_count = 0; long unsigned int line_count = 0; UErrorCode status = U_ZERO_ERROR; - int success = 0; - if (! xml_node) { + if (!xml_node) + { printf("Could not parse XML config file '%s' \n", config.conffile); - exit (1); + exit(1); } - if (!xml_locale || !strlen((const char *) xml_locale)) - return; - - config.chain = icu_chain_xml_config(xml_node, (const char *) xml_locale, 0, - &status); + config.chain = icu_chain_xml_config(xml_node, 1, &status); - xmlFree(xml_locale); - - - if (config.chain && U_SUCCESS(status)) - success = 1; - else { + if (!config.chain || !U_SUCCESS(status)) + { printf("Could not set up ICU chain from config file '%s' \n", config.conffile); - exit (1); + if (!U_SUCCESS(status)) + printf("ICU Error: %d %s\n", status, u_errorName(status)); + exit(1); } - - if (p_config->xmloutput) fprintf(config.outfile, @@ -467,38 +471,70 @@ static void process_text_file(const struct config_t *p_config) /* read input lines for processing */ while ((line=fgets(linebuf, sizeof(linebuf)-1, config.infile))) { - success = icu_chain_assign_cstr(config.chain, line, &status); + WRBUF sw = wrbuf_alloc(); + WRBUF cdata = wrbuf_alloc(); + int success = icu_chain_assign_cstr(config.chain, line, &status); line_count++; - while (success && icu_chain_next_token(config.chain, &status)){ + while (success && icu_chain_next_token(config.chain, &status)) + { if (U_FAILURE(status)) success = 0; - else { + else + { + const char *sortkey = icu_chain_token_sortkey(config.chain); + wrbuf_rewind(sw); + wrbuf_puts_escaped(sw, sortkey); token_count++; if (p_config->xmloutput) + { fprintf(config.outfile, - "\n", - token_count, - line_count, - icu_chain_token_norm(config.chain), - icu_chain_token_display(config.chain)); + "sortoutput) + { + wrbuf_rewind(cdata); + wrbuf_xmlputs(cdata, wrbuf_cstr(sw)); + fprintf(config.outfile, " sortkey=\"%s\"", + wrbuf_cstr(cdata)); + } + fprintf(config.outfile, "/>\n"); + } else - fprintf(config.outfile, "%lu %lu '%s' '%s'\n", + { + fprintf(config.outfile, "%lu %lu '%s' '%s'", token_count, line_count, icu_chain_token_norm(config.chain), icu_chain_token_display(config.chain)); + if (p_config->sortoutput) + { + fprintf(config.outfile, " '%s'", wrbuf_cstr(sw)); + } + fprintf(config.outfile, "\n"); + } } } - + wrbuf_destroy(sw); + wrbuf_destroy(cdata); } if (p_config->xmloutput) - fprintf(config.outfile, + fprintf(config.outfile, "\n" "\n"); - + icu_chain_destroy(config.chain); xmlFreeDoc(doc); if (line) @@ -528,15 +564,17 @@ int main(int argc, char **argv) "re-configure and re-compile\n"); + exit(3); #endif /* YAZ_HAVE_ICU */ - return(0); + return 0; } /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab