X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=fc7887060f6a88f16de6c3939a2d45b250ba18ab;hp=a117ad2804f924276e56ac27636d70c4264860b1;hb=6a0f9234f945bc4956e2bcef75f715661a9eba9a;hpb=eeb136c56069a5365e5550fec2e40ac592b48981 diff --git a/index/rpnsearch.c b/index/rpnsearch.c index a117ad2..fc78870 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,8 +1,5 @@ -/* $Id: rpnsearch.c,v 1.31 2008-01-26 15:48:29 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2009 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -238,7 +235,8 @@ static void add_non_space(const char *start, const char *end, static int term_100_icu(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - WRBUF display_term) + WRBUF display_term, + int right_trunc) { int i; const char *res_buf = 0; @@ -252,14 +250,38 @@ static int term_100_icu(zebra_map_t zm, return 0; } wrbuf_write(display_term, display_buf, display_len); + if (right_trunc) + { + /* ICU sort keys seem to be of the form + basechars \x01 accents \x01 length + For now we'll just right truncate from basechars . This + may give false hits due to accents not being used. + */ + i = res_len; + while (--i >= 0 && res_buf[i] != '\x01') + ; + if (i > 0) + { + while (--i >= 0 && res_buf[i] != '\x01') + ; + } + if (i == 0) + { /* did not find base chars at all. Throw error */ + return -1; + } + res_len = i; /* reduce res_len */ + } for (i = 0; i < res_len; i++) { if (strchr(REGEX_CHARS "\\", res_buf[i])) wrbuf_putc(term_dict, '\\'); if (res_buf[i] < 32) wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, res_buf[i]); } + if (right_trunc) + wrbuf_puts(term_dict, ".*"); return 1; } @@ -275,9 +297,6 @@ static int term_100(zebra_map_t zm, const char *space_start = 0; const char *space_end = 0; - if (zebra_maps_is_icu(zm)) - return term_100_icu(zm, src, term_dict, space_split, display_term); - if (!term_pre(zm, src, NULL, NULL, !space_split)) return 0; s0 = *src; @@ -1013,113 +1032,161 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); prefix_len = wrbuf_len(term_dict); - - switch (truncation_value) - { - case -1: /* not specified */ - case 100: /* do not truncate */ - if (!string_relation(zh, zapt, &termp, term_dict, - attributeSet, - zm, space_split, display_term, - &relation_error)) + + if (zebra_maps_is_icu(zm)) + { + int relation_value; + AttrType relation; + + attr_init_APT(&relation, zapt, 2); + relation_value = attr_find(&relation, NULL); + if (relation_value == 103) /* always matches */ + termp += strlen(termp); /* move to end of term */ + else if (relation_value == 3 || relation_value == 102 || relation_value == -1) { - if (relation_error) + /* ICU case */ + switch (truncation_value) { - zebra_setError(zh, relation_error, 0); + case -1: /* not specified */ + case 100: /* do not truncate */ + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 1: /* right truncation */ + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); return ZEBRA_FAIL; } - *term_sub = 0; - return ZEBRA_OK; - } - break; - case 1: /* right truncation */ - wrbuf_putc(term_dict, '('); - if (!term_100(zm, &termp, term_dict, space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ".*)"); - break; - case 2: /* keft truncation */ - wrbuf_puts(term_dict, "(.*"); - if (!term_100(zm, &termp, term_dict, space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 3: /* left&right truncation */ - wrbuf_puts(term_dict, "(.*"); - if (!term_100(zm, &termp, term_dict, space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ".*)"); - break; - case 101: /* process # in term */ - wrbuf_putc(term_dict, '('); - if (!term_101(zm, &termp, term_dict, space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ")"); - break; - case 102: /* Regexp-1 */ - wrbuf_putc(term_dict, '('); - if (!term_102(zm, &termp, term_dict, space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 103: /* Regexp-2 */ - regex_range = 1; - wrbuf_putc(term_dict, '('); - if (!term_103(zm, &termp, term_dict, ®ex_range, - space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 104: /* process # and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_104(zm, &termp, term_dict, space_split, display_term)) - { - *term_sub = 0; - return ZEBRA_OK; } - wrbuf_putc(term_dict, ')'); - break; - case 105: /* process * and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_105(zm, &termp, term_dict, space_split, display_term, 1)) + else { - *term_sub = 0; - return ZEBRA_OK; + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE, + relation_value); + return ZEBRA_FAIL; } - wrbuf_putc(term_dict, ')'); - break; - case 106: /* process * and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_105(zm, &termp, term_dict, space_split, display_term, 0)) + } + else + { + /* non-ICU case. using string.chr and friends */ + switch (truncation_value) { - *term_sub = 0; - return ZEBRA_OK; + case -1: /* not specified */ + case 100: /* do not truncate */ + if (!string_relation(zh, zapt, &termp, term_dict, + attributeSet, + zm, space_split, display_term, + &relation_error)) + { + if (relation_error) + { + zebra_setError(zh, relation_error, 0); + return ZEBRA_FAIL; + } + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 1: /* right truncation */ + wrbuf_putc(term_dict, '('); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); + break; + case 2: /* left truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 3: /* left&right truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); + break; + case 101: /* process # in term */ + wrbuf_putc(term_dict, '('); + if (!term_101(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ")"); + break; + case 102: /* Regexp-1 */ + wrbuf_putc(term_dict, '('); + if (!term_102(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 103: /* Regexp-2 */ + regex_range = 1; + wrbuf_putc(term_dict, '('); + if (!term_103(zm, &termp, term_dict, ®ex_range, + space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 104: /* process # and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_104(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 105: /* process * and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 106: /* process * and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); + return ZEBRA_FAIL; } - wrbuf_putc(term_dict, ')'); - break; - default: - zebra_setError_zint(zh, - YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, - truncation_value); - return ZEBRA_FAIL; } if (1) { @@ -2578,6 +2645,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab