From 714209e118e0c66ce43edc1ab452ef675825c265 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 7 Nov 2007 10:24:28 +0000 Subject: [PATCH] Added first test ICU indexing test --- test/api/Makefile.am | 4 ++-- test/api/t17.idx | 37 ++++--------------------------------- test/api/words-icu.xml | 7 +++++++ util/zebramap.c | 38 +++++++++++++++++++++++++++++++++----- 4 files changed, 46 insertions(+), 40 deletions(-) create mode 100644 test/api/words-icu.xml diff --git a/test/api/Makefile.am b/test/api/Makefile.am index 12bd994..f933ee9 100644 --- a/test/api/Makefile.am +++ b/test/api/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.42 2007-11-06 10:29:59 adam Exp $ +# $Id: Makefile.am,v 1.43 2007-11-07 10:24:28 adam Exp $ noinst_PROGRAMS = testclient testclient_SOURCES = testclient.c @@ -9,7 +9,7 @@ check_PROGRAMS = $(simpletests) $(safaritests) TESTS = $(check_PROGRAMS) EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg zebra10.cfg zebra15.cfg safari.cfg \ - t10.att t10.abs zebra17.cfg t17.idx + t10.att t10.abs zebra17.cfg t17.idx words-icu.xml noinst_LIBRARIES = libtestlib.a diff --git a/test/api/t17.idx b/test/api/t17.idx index 0555ebd..20efd8c 100644 --- a/test/api/t17.idx +++ b/test/api/t17.idx @@ -1,5 +1,5 @@ # Zebra indexes as referred to from the *.abs-files. -# $Id: t17.idx,v 1.1 2007-11-06 10:30:00 adam Exp $ +# $Id: t17.idx,v 1.2 2007-11-07 10:24:28 adam Exp $ # # Traditional word index @@ -10,7 +10,9 @@ completeness 0 position 1 alwaysmatches 1 firstinfield 1 -simplechain dummy +locale en +# simplechain dummy +icuchain words-icu.xml # Phrase index # Used if completeness is 'complete {sub}field' (@attr 6=2, @attr 6=1) @@ -19,37 +21,6 @@ index p completeness 1 charmap string.chr -# URX (URL) index -# Used if structure=urx (@attr 4=104) -index u -completeness 0 -charmap urx.chr - -# Numeric index -# Used if structure=numeric (@attr 4=109) -index n -completeness 0 -charmap numeric.chr - -# Null map index (no mapping at all) -# Used if structure=key (@attr 4=3) -index 0 -completeness 0 -position 1 -charmap @ - -# Year -# Used if structure=year (@attr 4=4) -index y -completeness 0 -charmap @ - -# Date -# Used if structure=date (@attr 4=5) -index d -completeness 0 -charmap @ - # Sort register sort s completeness 1 diff --git a/test/api/words-icu.xml b/test/api/words-icu.xml new file mode 100644 index 0000000..b41ff56 --- /dev/null +++ b/test/api/words-icu.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/util/zebramap.c b/util/zebramap.c index e1cb678..a951c89 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,4 +1,4 @@ -/* $Id: zebramap.c,v 1.65 2007-11-06 10:30:02 adam Exp $ +/* $Id: zebramap.c,v 1.66 2007-11-07 10:24:28 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -250,7 +250,12 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, xmlNode *xml_node = xmlDocGetRootElement(zm->doc); zm->icu_chain = icu_chain_xml_config(xml_node, zm->locale, +/* not sure about sort for this function yet.. */ +#if 1 + 1, +#else zm->type == ZEBRA_MAP_TYPE_SORT, +#endif &status); if (!zm->icu_chain) { @@ -640,15 +645,38 @@ int zebra_map_tokenize(zebra_map_t zm, const char **result_buf, size_t *result_len) { assert(zm->use_chain); + + if (buf) + { + wrbuf_rewind(zm->simple_buf); + wrbuf_write(zm->simple_buf, buf, len); + zm->simple_off = 0; + } + if (!zm->icu_chain) + return tokenize_simple(zm, result_buf, result_len); + else { + UErrorCode status; if (buf) { - wrbuf_rewind(zm->simple_buf); - wrbuf_write(zm->simple_buf, buf, len); - zm->simple_off = 0; + yaz_log(YLOG_LOG, "assicn_cstr %s", wrbuf_cstr(zm->simple_buf)); + icu_chain_assign_cstr(zm->icu_chain, + wrbuf_cstr(zm->simple_buf), + &status); + assert(U_SUCCESS(status)); } - return tokenize_simple(zm, result_buf, result_len); + while (icu_chain_next_token(zm->icu_chain, &status)) + { + assert(U_SUCCESS(status)); + *result_buf = icu_chain_token_norm(zm->icu_chain); + assert(*result_buf); + yaz_log(YLOG_LOG, "got result %s", *result_buf); + *result_len = strlen(*result_buf); + if (**result_buf != '\0') + return 1; + } + assert(U_SUCCESS(status)); } return 0; } -- 1.7.10.4