projects
/
yaz-moved-to-github.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Do not reset querytype if session resets (HTTP/SRU).
[yaz-moved-to-github.git]
/
util
/
yaz-icu.c
diff --git
a/util/yaz-icu.c
b/util/yaz-icu.c
index
00b390d
..
56e2bf4
100644
(file)
--- a/
util/yaz-icu.c
+++ b/
util/yaz-icu.c
@@
-2,7
+2,7
@@
* Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
*
* Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: yaz-icu.c,v 1.12 2007-11-08 18:02:04 adam Exp $
+ * $Id: yaz-icu.c,v 1.15 2007-11-15 08:45:52 adam Exp $
*/
#if HAVE_CONFIG_H
*/
#if HAVE_CONFIG_H
@@
-25,6
+25,7
@@
#include <unicode/utrans.h>
#include <yaz/icu.h>
#include <unicode/utrans.h>
#include <yaz/icu.h>
+#include <yaz/wrbuf.h>
/* commando line and config parameters */
static struct config_t {
/* commando line and config parameters */
static struct config_t {
@@
-53,10
+54,10
@@
void print_option_error(const struct config_t *p_config)
"./yaz-icu -p t -x\n"
"\n"
"Example ICU chain XML configuration file:\n"
"./yaz-icu -p t -x\n"
"\n"
"Example ICU chain XML configuration file:\n"
- "<icu_chain id=\"en:word\" locale=\"en\">\n"
- " <normalize rule=\"[:Control:] Any-Remove\"/>\n"
+ "<icu_chain locale=\"en\">\n"
+ " <transform rule=\"[:Control:] Any-Remove\"/>\n"
" <tokenize rule=\"l\"/>\n"
" <tokenize rule=\"l\"/>\n"
- " <normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>\n"
+ " <transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>\n"
" <casemap rule=\"l\"/>\n"
"</icu_chain>\n"
);
" <casemap rule=\"l\"/>\n"
"</icu_chain>\n"
);
@@
-446,7
+447,7
@@
static void process_text_file(const struct config_t *p_config)
exit (1);
}
exit (1);
}
- config.chain = icu_chain_xml_config(xml_node, 0, &status);
+ config.chain = icu_chain_xml_config(xml_node, 1, &status);
if (config.chain && U_SUCCESS(status))
success = 1;
if (config.chain && U_SUCCESS(status))
success = 1;
@@
-470,25
+471,35
@@
static void process_text_file(const struct config_t *p_config)
while (success && icu_chain_next_token(config.chain, &status))
{
while (success && icu_chain_next_token(config.chain, &status))
{
+ WRBUF sw = wrbuf_alloc();
if (U_FAILURE(status))
success = 0;
else {
if (U_FAILURE(status))
success = 0;
else {
+ const char *sortkey = icu_chain_token_sortkey(config.chain);
+ wrbuf_rewind(sw);
+ wrbuf_puts_escaped(sw, sortkey);
token_count++;
if (p_config->xmloutput)
token_count++;
if (p_config->xmloutput)
+ {
+ /* should XML encode this. Bug #1902 */
fprintf(config.outfile,
"<token id=\%lu\" line=\"%lu\""
fprintf(config.outfile,
"<token id=\%lu\" line=\"%lu\""
- " norm=\"%s\" display=\"%s\"/>\n",
+ " norm=\"%s\" display=\"%s\" sortkey=\"%s\"/>\n",
token_count,
line_count,
icu_chain_token_norm(config.chain),
token_count,
line_count,
icu_chain_token_norm(config.chain),
- icu_chain_token_display(config.chain));
+ icu_chain_token_display(config.chain),
+ wrbuf_cstr(sw));
+ }
else
else
- fprintf(config.outfile, "%lu %lu '%s' '%s'\n",
+ fprintf(config.outfile, "%lu %lu '%s' '%s' '%s'\n",
token_count,
line_count,
icu_chain_token_norm(config.chain),
token_count,
line_count,
icu_chain_token_norm(config.chain),
- icu_chain_token_display(config.chain));
+ icu_chain_token_display(config.chain),
+ wrbuf_cstr(sw));
}
}
+ wrbuf_destroy(sw);
}
}
}
}