X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=util%2Fzebramap.c;h=ea467981429d87cf312788f5604f048ace293950;hp=4361fda02fb8127a27973839534e763bdebd1cf7;hb=250de4ed23a44f5eb3552db317eef0d0fbe3265c;hpb=2c130f28f8f06161e35a7945c9acb11d966937f9 diff --git a/util/zebramap.c b/util/zebramap.c index 4361fda..ea46798 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,25 +1,25 @@ -/* $Id: zebramap.c,v 1.75 2007-12-13 18:08:26 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - - This file is part of the Zebra server. - - Zebra is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. - - Zebra is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with Zebra; see the file LICENSE.zebra. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. +/* This file is part of the Zebra server. + Copyright (C) 2004-2013 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -170,7 +170,7 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, } else if (!zm) { - yaz_log(YLOG_WARN, "%s:%d: Missing sort/index before '%s'", + yaz_log(YLOG_WARN, "%s:%d: Missing sort/index before '%s'", fname, lineno, argv[0]); return -1; } @@ -215,8 +215,8 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, zm->u.sort.entry_size = atoi(argv[1]); else { - yaz_log(YLOG_WARN, - "%s:%d: entrysize only valid in sort section", + yaz_log(YLOG_WARN, + "%s:%d: entrysize only valid in sort section", fname, lineno); return -1; } @@ -251,14 +251,14 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, #if YAZ_HAVE_ICU UErrorCode status; xmlNode *xml_node = xmlDocGetRootElement(zm->doc); - zm->icu_chain = + zm->icu_chain = icu_chain_xml_config(xml_node, /* not sure about sort for this function yet.. */ #if 1 1, #else zm->type == ZEBRA_MAP_TYPE_SORT, -#endif +#endif &status); if (!zm->icu_chain) { @@ -284,7 +284,7 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, } else { - yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'", + yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'", fname, lineno, argv[0]); return -1; } @@ -345,6 +345,12 @@ zebra_maps_t zebra_maps_open(Res res, const char *base_path, return zms; } +void zebra_maps_define_default_sort(zebra_maps_t zms) +{ + zebra_map_t zm = zebra_add_map(zms, "s", ZEBRA_MAP_TYPE_SORT); + zm->u.sort.entry_size = 80; +} + zebra_map_t zebra_map_get(zebra_maps_t zms, const char *id) { zebra_map_t zm; @@ -360,7 +366,7 @@ zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id) if (!zm) { zm = zebra_add_map(zms, id, ZEBRA_MAP_TYPE_INDEX); - + /* no reason to warn if no maps are read from file */ if (zms->no_files_read) yaz_log(YLOG_WARN, "Unknown register type: %s", id); @@ -395,7 +401,7 @@ const char **zebra_maps_input(zebra_map_t zm, chrmaptab maptab = zebra_charmap_get(zm); if (maptab) return chr_map_input(maptab, from, len, first); - + zm->zebra_maps->temp_map_str[0] = **from; (*from)++; @@ -406,7 +412,7 @@ const char **zebra_maps_search(zebra_map_t zm, const char **from, int len, int *q_map_match) { chrmaptab maptab; - + *q_map_match = 0; maptab = zebra_charmap_get(zm); if (maptab) @@ -441,7 +447,7 @@ const char *zebra_maps_output(zebra_map_t zm, /* ------------------------------------ */ int zebra_maps_is_complete(zebra_map_t zm) -{ +{ if (zm) return zm->completeness; return 0; @@ -467,7 +473,7 @@ int zebra_maps_is_staticrank(zebra_map_t zm) return zm->type == ZEBRA_MAP_TYPE_STATICRANK; return 0; } - + int zebra_maps_is_sort(zebra_map_t zm) { if (zm) @@ -566,11 +572,11 @@ int zebra_maps_attr(zebra_maps_t zms, Z_AttributesPlusTerm *zapt, break; case 106: /* document-text */ *search_type = "or-list"; - break; + break; case -1: case 1: /* phrase */ case 2: /* word */ - case 108: /* string */ + case 108: /* string */ *search_type = "phrase"; break; case 107: /* local-number */ @@ -662,7 +668,8 @@ int zebra_map_tokenize_next(zebra_map_t zm, UErrorCode status; while (icu_chain_next_token(zm->icu_chain, &status)) { - assert(U_SUCCESS(status)); + if (!U_SUCCESS(status)) + return 0; *result_buf = icu_chain_token_sortkey(zm->icu_chain); assert(*result_buf); @@ -684,7 +691,6 @@ int zebra_map_tokenize_next(zebra_map_t zm, if (**result_buf != '\0') return 1; } - assert(U_SUCCESS(status)); } return 0; #else @@ -695,6 +701,9 @@ int zebra_map_tokenize_next(zebra_map_t zm, int zebra_map_tokenize_start(zebra_map_t zm, const char *buf, size_t len) { +#if YAZ_HAVE_ICU + int ret; +#endif assert(zm->use_chain); wrbuf_rewind(zm->input_str); @@ -709,84 +718,28 @@ int zebra_map_tokenize_start(zebra_map_t zm, wrbuf_rewind(zm->print_str); wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str), wrbuf_len(zm->input_str)); - - yaz_log(YLOG_LOG, "input %s", - wrbuf_cstr(zm->print_str)); - } - icu_chain_assign_cstr(zm->icu_chain, - wrbuf_cstr(zm->input_str), - &status); - assert(U_SUCCESS(status)); - } -#endif - return 0; -} -#if 0 -int zebra_map_tokenize(zebra_map_t zm, - const char *buf, size_t len, - const char **result_buf, size_t *result_len) -{ - assert(zm->use_chain); - - if (buf) - { - wrbuf_rewind(zm->input_str); - wrbuf_write(zm->input_str, buf, len); - zm->simple_off = 0; - } - -#if YAZ_HAVE_ICU - if (!zm->icu_chain) - return tokenize_simple(zm, result_buf, result_len); - else - { - UErrorCode status; - if (buf) - { - if (zm->debug) - { - wrbuf_rewind(zm->print_str); - wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str), - wrbuf_len(zm->input_str)); - - yaz_log(YLOG_LOG, "input %s", - wrbuf_cstr(zm->print_str)); - } - icu_chain_assign_cstr(zm->icu_chain, - wrbuf_cstr(zm->input_str), - &status); - assert(U_SUCCESS(status)); + yaz_log(YLOG_LOG, "input %s", + wrbuf_cstr(zm->print_str)); } - while (icu_chain_next_token(zm->icu_chain, &status)) + ret = icu_chain_assign_cstr(zm->icu_chain, + wrbuf_cstr(zm->input_str), &status); + if (!ret && !U_SUCCESS(status)) { - assert(U_SUCCESS(status)); - *result_buf = icu_chain_token_sortkey(zm->icu_chain); - assert(*result_buf); - - *result_len = strlen(*result_buf); - if (zm->debug) { - wrbuf_rewind(zm->print_str); - wrbuf_write_escaped(zm->print_str, *result_buf, *result_len); - yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str)); + yaz_log(YLOG_WARN, "bad encoding for input"); } - - if (**result_buf != '\0') - return 1; + return -1; } - assert(U_SUCCESS(status)); } - return 0; -#else - return tokenize_simple(zm, result_buf, result_len); #endif + return 0; } -#endif int zebra_maps_is_icu(zebra_map_t zm) { + assert(zm); #if YAZ_HAVE_ICU return zm->use_chain; #else @@ -798,6 +751,7 @@ int zebra_maps_is_icu(zebra_map_t zm) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab