X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fzebramap.c;h=67613ec6f6a2e9a7cfe783edfbfc441c69de3941;hb=5670e4ac9b1b2fe462afdfec8dc0f747957084bc;hp=fd62d4aa5a463a8637fc2ac83ebf5a7639d3b88b;hpb=d8deeadf0ff6606e63ce21b2b2499a40d69e482c;p=idzebra-moved-to-github.git diff --git a/util/zebramap.c b/util/zebramap.c index fd62d4a..67613ec 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,4 +1,4 @@ -/* $Id: zebramap.c,v 1.72 2007-11-15 08:53:26 adam Exp $ +/* $Id: zebramap.c,v 1.77 2007-12-20 19:05:12 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -230,8 +230,16 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, } else if (!yaz_matchstr(argv[0], "icuchain")) { + char full_path[1024]; + if (!yaz_filepath_resolve(argv[1], zms->tabpath, zms->tabroot, + full_path)) + { + yaz_log(YLOG_WARN, "%s:%d: Could not locate icuchain config '%s'", + fname, lineno, argv[1]); + return -1; + } #if YAZ_HAVE_XML2 - zm->doc = xmlParseFile(argv[1]); + zm->doc = xmlParseFile(full_path); if (!zm->doc) { yaz_log(YLOG_WARN, "%s:%d: Could not load icuchain config '%s'", @@ -639,49 +647,34 @@ static int tokenize_simple(zebra_map_t zm, return 0; } -int zebra_map_tokenize(zebra_map_t zm, - const char *buf, size_t len, - const char **result_buf, size_t *result_len) + +int zebra_map_tokenize_next(zebra_map_t zm, + const char **result_buf, size_t *result_len, + const char **display_buf, size_t *display_len) { assert(zm->use_chain); - if (buf) - { - wrbuf_rewind(zm->input_str); - wrbuf_write(zm->input_str, buf, len); - zm->simple_off = 0; - } - #if YAZ_HAVE_ICU if (!zm->icu_chain) return tokenize_simple(zm, result_buf, result_len); else { UErrorCode status; - if (buf) - { - if (zm->debug) - { - wrbuf_rewind(zm->print_str); - wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str), - wrbuf_len(zm->input_str)); - - yaz_log(YLOG_LOG, "input %s", - wrbuf_cstr(zm->print_str)); - } - icu_chain_assign_cstr(zm->icu_chain, - wrbuf_cstr(zm->input_str), - &status); - assert(U_SUCCESS(status)); - } while (icu_chain_next_token(zm->icu_chain, &status)) { - assert(U_SUCCESS(status)); + if (!U_SUCCESS(status)) + return 0; *result_buf = icu_chain_token_sortkey(zm->icu_chain); assert(*result_buf); *result_len = strlen(*result_buf); + if (display_buf) + { + *display_buf = icu_chain_token_display(zm->icu_chain); + if (display_len) + *display_len = strlen(*display_buf); + } if (zm->debug) { wrbuf_rewind(zm->print_str); @@ -692,7 +685,6 @@ int zebra_map_tokenize(zebra_map_t zm, if (**result_buf != '\0') return 1; } - assert(U_SUCCESS(status)); } return 0; #else @@ -700,6 +692,43 @@ int zebra_map_tokenize(zebra_map_t zm, #endif } +int zebra_map_tokenize_start(zebra_map_t zm, + const char *buf, size_t len) +{ + assert(zm->use_chain); + + wrbuf_rewind(zm->input_str); + wrbuf_write(zm->input_str, buf, len); + zm->simple_off = 0; +#if YAZ_HAVE_ICU + if (zm->icu_chain) + { + UErrorCode status; + if (zm->debug) + { + wrbuf_rewind(zm->print_str); + wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str), + wrbuf_len(zm->input_str)); + + yaz_log(YLOG_LOG, "input %s", + wrbuf_cstr(zm->print_str)); + } + icu_chain_assign_cstr(zm->icu_chain, + wrbuf_cstr(zm->input_str), + &status); + if (!U_SUCCESS(status)) + { + if (zm->debug) + { + yaz_log(YLOG_WARN, "bad encoding for input"); + } + return -1; + } + } +#endif + return 0; +} + int zebra_maps_is_icu(zebra_map_t zm) { #if YAZ_HAVE_ICU