X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=util%2Fzebramap.c;h=8165026f2290858427985162d62a45383f570964;hp=a951c89ea338c14fbd893317904e4a11e4867352;hb=99842ec71f065fd6886daa355923b01d9ce71d26;hpb=714209e118e0c66ce43edc1ab452ef675825c265 diff --git a/util/zebramap.c b/util/zebramap.c index a951c89..8165026 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,25 +1,25 @@ -/* $Id: zebramap.c,v 1.66 2007-11-07 10:24:28 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - - This file is part of the Zebra server. - - Zebra is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. - - Zebra is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with Zebra; see the file LICENSE.zebra. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -28,8 +28,8 @@ #include #include -#if HAVE_ICU -#include +#if YAZ_HAVE_ICU +#include #endif #include @@ -47,6 +47,7 @@ struct zebra_map { int first_in_field; int type; int use_chain; + int debug; union { struct { int entry_size; @@ -54,15 +55,15 @@ struct zebra_map { } u; chrmaptab maptab; const char *maptab_name; - const char *locale; zebra_maps_t zebra_maps; #if YAZ_HAVE_XML2 xmlDocPtr doc; #endif -#if HAVE_ICU +#if YAZ_HAVE_ICU struct icu_chain *icu_chain; #endif - WRBUF simple_buf; + WRBUF input_str; + WRBUF print_str; size_t simple_off; struct zebra_map *next; }; @@ -86,14 +87,15 @@ void zebra_maps_close(zebra_maps_t zms) { if (zm->maptab) chrmaptab_destroy(zm->maptab); -#if HAVE_ICU +#if YAZ_HAVE_ICU if (zm->icu_chain) icu_chain_destroy(zm->icu_chain); #endif #if YAZ_HAVE_XML2 xmlFreeDoc(zm->doc); #endif - wrbuf_destroy(zm->simple_buf); + wrbuf_destroy(zm->input_str); + wrbuf_destroy(zm->print_str); zm = zm->next; } wrbuf_destroy(zms->wrbuf_1); @@ -110,7 +112,7 @@ zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type, zm->id = nmem_strdup(zms->nmem, index_type); zm->maptab_name = 0; zm->use_chain = 0; - zm->locale = 0; + zm->debug = 0; zm->maptab = 0; zm->type = map_type; zm->completeness = 0; @@ -124,13 +126,14 @@ zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type, zms->map_list = zm; zms->last_map = zm; zm->next = 0; -#if HAVE_ICU +#if YAZ_HAVE_ICU zm->icu_chain = 0; #endif #if YAZ_HAVE_XML2 zm->doc = 0; #endif - zm->simple_buf = wrbuf_alloc(); + zm->input_str = wrbuf_alloc(); + zm->print_str = wrbuf_alloc(); return zm; } @@ -218,25 +221,25 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, return -1; } } - else if (!yaz_matchstr(argv[0], "locale")) - { - zm->locale = nmem_strdup(zms->nmem, argv[1]); - } else if (!yaz_matchstr(argv[0], "simplechain")) { zm->use_chain = 1; +#if YAZ_HAVE_ICU zm->icu_chain = 0; +#endif } else if (!yaz_matchstr(argv[0], "icuchain")) { -#if YAZ_HAVE_XML2 - if (!zm->locale) + char full_path[1024]; + if (!yaz_filepath_resolve(argv[1], zms->tabpath, zms->tabroot, + full_path)) { - yaz_log(YLOG_WARN, "%s:%d: locale required before icuchain", - fname, lineno); + yaz_log(YLOG_WARN, "%s:%d: Could not locate icuchain config '%s'", + fname, lineno, argv[1]); return -1; } - zm->doc = xmlParseFile(argv[1]); +#if YAZ_HAVE_XML2 + zm->doc = xmlParseFile(full_path); if (!zm->doc) { yaz_log(YLOG_WARN, "%s:%d: Could not load icuchain config '%s'", @@ -245,11 +248,11 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, } else { -#if HAVE_ICU +#if YAZ_HAVE_ICU UErrorCode status; xmlNode *xml_node = xmlDocGetRootElement(zm->doc); zm->icu_chain = - icu_chain_xml_config(xml_node, zm->locale, + icu_chain_xml_config(xml_node, /* not sure about sort for this function yet.. */ #if 1 1, @@ -275,6 +278,10 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, return -1; #endif } + else if (!yaz_matchstr(argv[0], "debug") && argc == 2) + { + zm->debug = atoi(argv[1]); + } else { yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'", @@ -338,6 +345,12 @@ zebra_maps_t zebra_maps_open(Res res, const char *base_path, return zms; } +void zebra_maps_define_default_sort(zebra_maps_t zms) +{ + zebra_map_t zm = zebra_add_map(zms, "s", ZEBRA_MAP_TYPE_SORT); + zm->u.sort.entry_size = 80; +} + zebra_map_t zebra_map_get(zebra_maps_t zms, const char *id) { zebra_map_t zm; @@ -615,8 +628,8 @@ WRBUF zebra_replace(zebra_map_t zm, const char *ex_list, static int tokenize_simple(zebra_map_t zm, const char **result_buf, size_t *result_len) { - char *buf = wrbuf_buf(zm->simple_buf); - size_t len = wrbuf_len(zm->simple_buf); + char *buf = wrbuf_buf(zm->input_str); + size_t len = wrbuf_len(zm->input_str); size_t i = zm->simple_off; size_t start; @@ -640,50 +653,94 @@ static int tokenize_simple(zebra_map_t zm, return 0; } -int zebra_map_tokenize(zebra_map_t zm, - const char *buf, size_t len, - const char **result_buf, size_t *result_len) + +int zebra_map_tokenize_next(zebra_map_t zm, + const char **result_buf, size_t *result_len, + const char **display_buf, size_t *display_len) { assert(zm->use_chain); - if (buf) - { - wrbuf_rewind(zm->simple_buf); - wrbuf_write(zm->simple_buf, buf, len); - zm->simple_off = 0; - } - +#if YAZ_HAVE_ICU if (!zm->icu_chain) return tokenize_simple(zm, result_buf, result_len); else { UErrorCode status; - if (buf) - { - yaz_log(YLOG_LOG, "assicn_cstr %s", wrbuf_cstr(zm->simple_buf)); - icu_chain_assign_cstr(zm->icu_chain, - wrbuf_cstr(zm->simple_buf), - &status); - assert(U_SUCCESS(status)); - } while (icu_chain_next_token(zm->icu_chain, &status)) { - assert(U_SUCCESS(status)); - *result_buf = icu_chain_token_norm(zm->icu_chain); + if (!U_SUCCESS(status)) + return 0; + *result_buf = icu_chain_token_sortkey(zm->icu_chain); assert(*result_buf); - yaz_log(YLOG_LOG, "got result %s", *result_buf); + *result_len = strlen(*result_buf); + + if (display_buf) + { + *display_buf = icu_chain_token_display(zm->icu_chain); + if (display_len) + *display_len = strlen(*display_buf); + } + if (zm->debug) + { + wrbuf_rewind(zm->print_str); + wrbuf_write_escaped(zm->print_str, *result_buf, *result_len); + yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str)); + } + if (**result_buf != '\0') return 1; } - assert(U_SUCCESS(status)); } return 0; +#else + return tokenize_simple(zm, result_buf, result_len); +#endif +} + +int zebra_map_tokenize_start(zebra_map_t zm, + const char *buf, size_t len) +{ +#if YAZ_HAVE_ICU + int ret; +#endif + assert(zm->use_chain); + + wrbuf_rewind(zm->input_str); + wrbuf_write(zm->input_str, buf, len); + zm->simple_off = 0; +#if YAZ_HAVE_ICU + if (zm->icu_chain) + { + UErrorCode status; + if (zm->debug) + { + wrbuf_rewind(zm->print_str); + wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str), + wrbuf_len(zm->input_str)); + + yaz_log(YLOG_LOG, "input %s", + wrbuf_cstr(zm->print_str)); + } + ret = icu_chain_assign_cstr(zm->icu_chain, + wrbuf_cstr(zm->input_str), &status); + if (!ret && !U_SUCCESS(status)) + { + if (zm->debug) + { + yaz_log(YLOG_WARN, "bad encoding for input"); + } + return -1; + } + } +#endif + return 0; } int zebra_maps_is_icu(zebra_map_t zm) { -#if HAVE_ICU + assert(zm); +#if YAZ_HAVE_ICU return zm->use_chain; #else return 0; @@ -694,6 +751,7 @@ int zebra_maps_is_icu(zebra_map_t zm) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab