-/*
- * Copyright (C) 1995-2007, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2010 Index Data
* See the file LICENSE for details.
- *
- * $Id: yaz-icu.c,v 1.16 2008-01-14 22:58:06 adam Exp $
*/
#if HAVE_CONFIG_H
char conffile[1024];
char print[1024];
int xmloutput;
+ int sortoutput;
yaz_icu_chain_t chain;
FILE * infile;
FILE * outfile;
fprintf(stderr, "yaz-icu\n"
" [-c (path/to/config/file.xml)]\n"
" [-p (a|c|l|t)] print ICU info \n"
+ " [-s] Show sort normalization key\n"
" [-x] XML output\n"
"\n"
"Examples:\n"
p_config->conffile[0] = 0;
p_config->print[0] = 0;
p_config->xmloutput = 0;
+ p_config->sortoutput = 0;
p_config->chain = 0;
p_config->infile = stdin;
p_config->outfile = stdout;
/* set up command line parameters */
- while ((ret = options("c:p:x", argv, argc, &arg)) != -2)
+ while ((ret = options("c:p:xs", argv, argc, &arg)) != -2)
{
switch (ret)
{
case 'p':
strcpy(p_config->print, arg);
break;
+ case 's':
+ p_config->sortoutput = 1;
+ break;
case 'x':
p_config->xmloutput = 1;
break;
default:
+ printf("Got %d\n", ret);
print_option_error(p_config);
}
}
if (p_config->xmloutput)
fprintf(config.outfile, "<converters count=\"%d\" default=\"%s\">\n",
count, ucnv_getDefaultName());
- else {
+ else
+ {
fprintf(config.outfile, "Available ICU converters: %d\n", count);
fprintf(config.outfile, "Default ICU Converter is: '%s'\n",
ucnv_getDefaultName());
}
- for(i=0;i<count;i++)
+ for (i = 0; i < count; i++)
{
if (p_config->xmloutput)
fprintf(config.outfile, "<converter id=\"%s\"/>\n",
ucnv_getAvailableName(i));
else
- fprintf(config.outfile, "%s ", ucnv_getAvailableName(i));
+ fprintf(config.outfile, "%s\n", ucnv_getAvailableName(i));
}
if (p_config->xmloutput)
static void print_icu_transliterators(const struct config_t *p_config)
{
- int32_t buf_cap = 128;
- char buf[128];
- int32_t i;
- int32_t count = utrans_countAvailableIDs();
-
+ UErrorCode status;
+ UEnumeration *en = utrans_openIDs(&status);
+ int32_t count = uenum_count(en, &status);
+ const char *name;
+ int32_t length;
+
if (p_config->xmloutput)
fprintf(config.outfile, "<transliterators count=\"%d\">\n", count);
else
fprintf(config.outfile, "Available ICU transliterators: %d\n", count);
-
- for(i = 0; i <count; i++)
+
+ while ((name = uenum_next(en, &length, &status)))
{
- utrans_getAvailableID(i, buf, buf_cap);
if (p_config->xmloutput)
- fprintf(config.outfile, "<transliterator id=\"%s\"/>\n", buf);
+ fprintf(config.outfile, "<transliterator id=\"%s\"/>\n", name);
else
- fprintf(config.outfile, " %s", buf);
+ fprintf(config.outfile, "%s\n", name);
}
-
+ uenum_close(en);
if (p_config->xmloutput)
- {
fprintf(config.outfile, "</transliterators>\n");
- }
else
{
fprintf(config.outfile, "\n\nUnicode Set Patterns:\n"
" [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n"
" [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n"
"\n"
- "see http://icu.sourceforge.net/userguide/Transform.html\n"
- " http://www.unicode.org/Public/UNIDATA/UCD.html\n"
- " http://icu.sourceforge.net/userguide/Transform.html\n"
- " http://icu.sourceforge.net/userguide/TransformRule.html\n"
+ "see http://userguide.icu-project.org/transforms/general\n"
+ " http://www.unicode.org/reports/tr44/\n"
);
fprintf(config.outfile, "<locales count=\"%d\" default=\"%s\" collations=\"%d\">\n",
count, uloc_getDefault(), ucol_countAvailable());
}
+ else
+ {
+ fprintf(config.outfile, "Available ICU locales: %d\n", count);
+ fprintf(config.outfile, "Default locale is: %s\n", uloc_getDefault());
+ }
- for(i=0;i<count;i++)
+ for (i = 0; i < count; i++)
{
keyword_len
fprintf(config.outfile, "\n");
}
else
- fprintf(config.outfile, "%s ", uloc_getAvailable(i));
+ fprintf(config.outfile, "%s\n", uloc_getAvailable(i));
}
if (p_config->xmloutput)
fprintf(config.outfile, "</locales>\n");
else
fprintf(config.outfile, "\n");
- if(U_FAILURE(status))
+ if (U_FAILURE(status))
{
fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status));
- exit(status);
+ exit(2);
}
}
long unsigned int line_count = 0;
UErrorCode status = U_ZERO_ERROR;
- int success = 0;
- if (! xml_node)
+ if (!xml_node)
{
printf("Could not parse XML config file '%s' \n",
config.conffile);
- exit (1);
+ exit(1);
}
config.chain = icu_chain_xml_config(xml_node, 1, &status);
- if (config.chain && U_SUCCESS(status))
- success = 1;
- else {
+ if (!config.chain || !U_SUCCESS(status))
+ {
printf("Could not set up ICU chain from config file '%s' \n",
config.conffile);
- exit (1);
+ if (!U_SUCCESS(status))
+ printf("ICU Error: %d %s\n", status, u_errorName(status));
+ exit(1);
}
if (p_config->xmloutput)
/* read input lines for processing */
while ((line=fgets(linebuf, sizeof(linebuf)-1, config.infile)))
{
- success = icu_chain_assign_cstr(config.chain, line, &status);
+ WRBUF sw = wrbuf_alloc();
+ WRBUF cdata = wrbuf_alloc();
+ int success = icu_chain_assign_cstr(config.chain, line, &status);
line_count++;
while (success && icu_chain_next_token(config.chain, &status))
{
- WRBUF sw = wrbuf_alloc();
if (U_FAILURE(status))
success = 0;
- else {
+ else
+ {
const char *sortkey = icu_chain_token_sortkey(config.chain);
wrbuf_rewind(sw);
wrbuf_puts_escaped(sw, sortkey);
token_count++;
if (p_config->xmloutput)
{
- /* should XML encode this. Bug #1902 */
fprintf(config.outfile,
- "<token id=\"%lu\" line=\"%lu\""
- " norm=\"%s\" display=\"%s\" sortkey=\"%s\"/>\n",
- token_count,
- line_count,
- icu_chain_token_norm(config.chain),
- icu_chain_token_display(config.chain),
- wrbuf_cstr(sw));
+ "<token id=\"%lu\" line=\"%lu\"",
+ token_count, line_count);
+
+ wrbuf_rewind(cdata);
+ wrbuf_xmlputs(cdata, icu_chain_token_norm(config.chain));
+ fprintf(config.outfile, " norm=\"%s\"",
+ wrbuf_cstr(cdata));
+
+ wrbuf_rewind(cdata);
+ wrbuf_xmlputs(cdata, icu_chain_token_display(config.chain));
+ fprintf(config.outfile, " display=\"%s\"",
+ wrbuf_cstr(cdata));
+
+ if (p_config->sortoutput)
+ {
+ wrbuf_rewind(cdata);
+ wrbuf_xmlputs(cdata, wrbuf_cstr(sw));
+ fprintf(config.outfile, " sortkey=\"%s\"",
+ wrbuf_cstr(cdata));
+ }
+ fprintf(config.outfile, "/>\n");
}
else
- fprintf(config.outfile, "%lu %lu '%s' '%s' '%s'\n",
+ {
+ fprintf(config.outfile, "%lu %lu '%s' '%s'",
token_count,
line_count,
icu_chain_token_norm(config.chain),
- icu_chain_token_display(config.chain),
- wrbuf_cstr(sw));
+ icu_chain_token_display(config.chain));
+ if (p_config->sortoutput)
+ {
+ fprintf(config.outfile, " '%s'", wrbuf_cstr(sw));
+ }
+ fprintf(config.outfile, "\n");
+ }
}
- wrbuf_destroy(sw);
}
-
+ wrbuf_destroy(sw);
+ wrbuf_destroy(cdata);
}
if (p_config->xmloutput)
- fprintf(config.outfile,
+ fprintf(config.outfile,
"</tokens>\n"
"</icu>\n");
-
+
icu_chain_destroy(config.chain);
xmlFreeDoc(doc);
if (line)
#else /* YAZ_HAVE_ICU */
printf("ICU not available on your system.\n"
- "Please install libicu36-dev and icu-doc or similar, "
+ "Please install libicu-dev and icu-doc or similar, "
"re-configure and re-compile\n");
+ exit(3);
#endif /* YAZ_HAVE_ICU */
- return(0);
+ return 0;
}
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab