X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=47c1c234c84500c4dda68aab85c7a8301fc3cba5;hp=f5aa6698741ff10c706cd199bf421da6f64f0d82;hb=c5971ebf8a88865ed9a1f7c8cf9daa22544f07be;hpb=2b86700a060db9a529b7227a1d3a9175564cd931 diff --git a/index/rpnsearch.c b/index/rpnsearch.c index f5aa669..47c1c23 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 2004-2013 Index Data + Copyright (C) Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -187,7 +187,7 @@ static void esc_str(char *out_buf, size_t out_size, assert(in_buf); assert(out_size > 20); *out_buf = '\0'; - for (k = 0; k 0 && buf[--i] != '\x01') /* skip length */ + ; + while (i > 0 && buf[--i] != '\x01') /* skip accents */ + ; + return i; /* only basechars left */ +} + static int term_102_icu(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term) @@ -264,12 +278,8 @@ static int term_102_icu(zebra_map_t zm, if (zebra_map_tokenize_next(zm, &res_buf, &res_len, &display_buf, &display_len)) { - size_t i = res_len; - while (i > 0 && res_buf[--i] != '\x01') - ; - while (i > 0 && res_buf[--i] != '\x01') - ; - res_len = i; /* reduce res_len */ + size_t i; + res_len = icu_basechars(res_buf, res_len); for (i = 0; i < res_len; i++) { if (strchr(REGEX_CHARS "\\", res_buf[i])) @@ -301,59 +311,28 @@ static int term_102_icu(zebra_map_t zm, } static int term_100_icu(zebra_map_t zm, - const char **src, WRBUF term_dict, int space_split, + const char **src, WRBUF term_dict, WRBUF display_term, - int mode) + int mode, + size_t token_number) { size_t i; const char *res_buf = 0; size_t res_len = 0; const char *display_buf; size_t display_len; - const char *s0 = *src, *s1; - while (*s0 == ' ') - s0++; - - if (*s0 == '\0') - return 0; - - if (space_split) + zebra_map_tokenize_start(zm, *src, strlen(*src)); + for (i = 0; i <= token_number; i++) { - s1 = s0; - while (*s1 && *s1 != ' ') - s1++; - } - else - s1 = s0 + strlen(s0); - - *src = s1; - - zebra_map_tokenize_start(zm, s0, s1 - s0); - - if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, - &display_buf, &display_len)) - { - return 0; + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + return 0; } wrbuf_write(display_term, display_buf, display_len); if (mode) { - /* ICU sort keys seem to be of the form - basechars \x01 accents \x01 length - For now we'll just right truncate from basechars . This - may give false hits due to accents not being used. - */ - i = res_len; - while (i > 0 && res_buf[--i] != '\x01') - ; - while (i > 0 && res_buf[--i] != '\x01') - ; - if (i == 0) - { /* did not find base chars at all. Throw error */ - return -1; - } - res_len = i; /* reduce res_len */ + res_len = icu_basechars(res_buf, res_len); } if (mode & 2) wrbuf_puts(term_dict, ".*"); @@ -728,7 +707,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) else dst[dst_p++] = d; } - for (i = 0; i