X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=5fa197fe929bc6f30bf9820ee162ab79c181250f;hp=d1b9b88b8b544b25567497aea04e73bc019be4db;hb=03419e1f6a4ae8a5b255e7c215da40678c30bb25;hpb=896b30853daabb6294afe8b0a2f74fa6d6e397d8 diff --git a/index/rpnsearch.c b/index/rpnsearch.c index d1b9b88..5fa197f 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,8 +1,5 @@ -/* $Id: rpnsearch.c,v 1.27 2007-12-07 14:09:09 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2010 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -157,7 +154,7 @@ static int grep_handle(char *name, const char *info, void *p) } static int term_pre(zebra_map_t zm, const char **src, - const char *ct1, const char *ct2, int first) + const char *ct1, int first) { const char *s1, *s0 = *src; const char **map; @@ -167,8 +164,6 @@ static int term_pre(zebra_map_t zm, const char **src, { if (ct1 && strchr(ct1, *s0)) break; - if (ct2 && strchr(ct2, *s0)) - break; s1 = s0; map = zebra_maps_input(zm, &s1, strlen(s1), first); if (**map != *CHR_SPACE) @@ -206,16 +201,16 @@ static void esc_str(char *out_buf, size_t out_size, } } -#define REGEX_CHARS " []()|.*+?!" +#define REGEX_CHARS " ^[]()|.*+?!\"$" static void add_non_space(const char *start, const char *end, WRBUF term_dict, - char *dst_term, int *dst_ptr, + WRBUF display_term, const char **map, int q_map_match) { size_t sz = end - start; - memcpy(dst_term + *dst_ptr, start, sz); - (*dst_ptr) += sz; + + wrbuf_write(display_term, start, sz); if (!q_map_match) { while (start < end) @@ -238,45 +233,69 @@ static void add_non_space(const char *start, const char *end, static int term_100_icu(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term, + int right_trunc) { int i; const char *res_buf = 0; size_t res_len = 0; - if (!zebra_map_tokenize_next(zm, &res_buf, &res_len)) + const char *display_buf; + size_t display_len; + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) { *src += strlen(*src); return 0; } - strcat(dst_term, *src); + wrbuf_write(display_term, display_buf, display_len); + if (right_trunc) + { + /* ICU sort keys seem to be of the form + basechars \x01 accents \x01 length + For now we'll just right truncate from basechars . This + may give false hits due to accents not being used. + */ + i = res_len; + while (--i >= 0 && res_buf[i] != '\x01') + ; + if (i > 0) + { + while (--i >= 0 && res_buf[i] != '\x01') + ; + } + if (i == 0) + { /* did not find base chars at all. Throw error */ + return -1; + } + res_len = i; /* reduce res_len */ + } for (i = 0; i < res_len; i++) { - if (strchr(REGEX_CHARS, res_buf[i])) + if (strchr(REGEX_CHARS "\\", res_buf[i])) wrbuf_putc(term_dict, '\\'); if (res_buf[i] < 32) wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, res_buf[i]); } + if (right_trunc) + wrbuf_puts(term_dict, ".*"); return 1; } /* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; const char *space_start = 0; const char *space_end = 0; - if (zebra_maps_is_icu(zm)) - return term_100_icu(zm, src, term_dict, space_split, dst_term); - - if (!term_pre(zm, src, NULL, NULL, !space_split)) + if (!term_pre(zm, src, 0, !space_split)) return 0; s0 = *src; while (*s0) @@ -303,7 +322,7 @@ static int term_100(zebra_map_t zm, { if (strchr(REGEX_CHARS, *space_start)) wrbuf_putc(term_dict, '\\'); - dst_term[j++] = *space_start; + wrbuf_putc(display_term, *space_start); wrbuf_putc(term_dict, *space_start); space_start++; @@ -314,10 +333,8 @@ static int term_100(zebra_map_t zm, } i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } - dst_term[j] = '\0'; *src = s0; return i; } @@ -325,14 +342,13 @@ static int term_100(zebra_map_t zm, /* term_101: handle term, where trunc = Process # */ static int term_101(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "#", "#", !space_split)) + if (!term_pre(zm, src, "#", !space_split)) return 0; s0 = *src; while (*s0) @@ -341,7 +357,8 @@ static int term_101(zebra_map_t zm, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -352,11 +369,9 @@ static int term_101(zebra_map_t zm, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j++] = '\0'; *src = s0; return i; } @@ -364,14 +379,13 @@ static int term_101(zebra_map_t zm, /* term_103: handle term, where trunc = re-2 (regular expressions) */ static int term_103(zebra_map_t zm, const char **src, WRBUF term_dict, int *errors, int space_split, - char *dst_term) + WRBUF display_term) { int i = 0; - int j = 0; const char *s0; const char **map; - if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zm, src, "^\\()[].*+?|", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -386,7 +400,7 @@ static int term_103(zebra_map_t zm, const char **src, { if (strchr("^\\()[].*+?|-", *s0)) { - dst_term[j++] = *s0; + wrbuf_putc(display_term, *s0); wrbuf_putc(term_dict, *s0); s0++; i++; @@ -400,11 +414,9 @@ static int term_103(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j] = '\0'; *src = s0; return i; @@ -412,22 +424,21 @@ static int term_103(zebra_map_t zm, const char **src, /* term_103: handle term, where trunc = re-1 (regular expressions) */ static int term_102(zebra_map_t zm, const char **src, - WRBUF term_dict, int space_split, char *dst_term) + WRBUF term_dict, int space_split, WRBUF display_term) { - return term_103(zm, src, term_dict, NULL, space_split, dst_term); + return term_103(zm, src, term_dict, NULL, space_split, display_term); } /* term_104: handle term, process # and ! */ static int term_104(zebra_map_t zm, const char **src, - WRBUF term_dict, int space_split, char *dst_term) + WRBUF term_dict, int space_split, WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "?*#", "?*#", !space_split)) + if (!term_pre(zm, src, "?*#", !space_split)) return 0; s0 = *src; while (*s0) @@ -435,14 +446,16 @@ static int term_104(zebra_map_t zm, const char **src, if (*s0 == '?') { i++; - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; if (*s0 >= '0' && *s0 <= '9') { int limit = 0; while (*s0 >= '0' && *s0 <= '9') { limit = limit * 10 + (*s0 - '0'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } if (limit > 20) limit = 20; @@ -460,13 +473,15 @@ static int term_104(zebra_map_t zm, const char **src, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '#') { i++; wrbuf_puts(term_dict, "."); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -477,11 +492,9 @@ static int term_104(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j++] = '\0'; *src = s0; return i; } @@ -489,14 +502,13 @@ static int term_104(zebra_map_t zm, const char **src, /* term_105/106: handle term, where trunc = Process * and ! and right trunc */ static int term_105(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term, int right_truncate) + WRBUF display_term, int right_truncate) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "*!", "*!", !space_split)) + if (!term_pre(zm, src, "\\*!", !space_split)) return 0; s0 = *src; while (*s0) @@ -505,13 +517,22 @@ static int term_105(zebra_map_t zm, const char **src, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '!') { i++; wrbuf_putc(term_dict, '.'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; + } + else if (*s0 == '\\') + { + i++; + wrbuf_puts(term_dict, "\\\\"); + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -522,13 +543,11 @@ static int term_105(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } if (right_truncate) wrbuf_puts(term_dict, ".*"); - dst_term[j++] = '\0'; *src = s0; return i; } @@ -673,7 +692,8 @@ void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx) static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, - zebra_map_t zm, int space_split, char *term_dst, + zebra_map_t zm, int space_split, + WRBUF display_term, int *error_code) { AttrType relation; @@ -689,7 +709,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -723,7 +743,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 2: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -758,7 +778,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 5: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -791,7 +811,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 4: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -832,7 +852,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!**term_sub) return 1; yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -862,7 +882,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, struct ord_list **ol, zebra_map_t zm); @@ -876,9 +896,10 @@ ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, AttrType term_ref_id_attr; AttrType hits_limit_attr; int term_ref_id_int; + zint hits_limit_from_attr; attr_init_APT(&hits_limit_attr, zapt, 11); - *hits_limit_value = attr_find(&hits_limit_attr, NULL); + hits_limit_from_attr = attr_find(&hits_limit_attr, NULL); attr_init_APT(&term_ref_id_attr, zapt, 10); term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str); @@ -888,26 +909,9 @@ ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, sprintf(res, "%d", term_ref_id_int); *term_ref_id_str = res; } + if (hits_limit_from_attr != -1) + *hits_limit_value = hits_limit_from_attr; - /* no limit given ? */ - if (*hits_limit_value == -1) - { - if (*term_ref_id_str) - { - /* use global if term_ref is present */ - *hits_limit_value = zh->approx_limit; - } - else - { - /* no counting if term_ref is not present */ - *hits_limit_value = 0; - } - } - else if (*hits_limit_value == 0) - { - /* 0 is the same as global limit */ - *hits_limit_value = zh->approx_limit; - } yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, *term_ref_id_str ? *term_ref_id_str : "none", *hits_limit_value); @@ -919,10 +923,10 @@ ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, static ZEBRA_RES search_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, const char *rank_type, const char *xpath_use, NMEM rset_nmem, @@ -932,9 +936,10 @@ static ZEBRA_RES search_term(ZebraHandle zh, { ZEBRA_RES res; struct ord_list *ol; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); *rset = 0; zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); @@ -942,22 +947,23 @@ static ZEBRA_RES search_term(ZebraHandle zh, res = string_term(zh, zapt, term_sub, term_dict, attributeSet, stream, grep_info, index_type, complete_flag, - term_dst, xpath_use, &ol, zm); + display_term, xpath_use, &ol, zm); wrbuf_destroy(term_dict); - if (res != ZEBRA_OK) - return res; - if (!*term_sub) /* no more terms ? */ - return res; - yaz_log(log_level_rpn, "term: %s", term_dst); - *rset = rset_trunc(zh, grep_info->isam_p_buf, - grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */, - zapt->term->which, rset_nmem, - kc, kc->scope, ol, index_type, hits_limit_value, - term_ref_id_str); - if (!*rset) - return ZEBRA_FAIL; - return ZEBRA_OK; + if (res == ZEBRA_OK && *term_sub) + { + yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term)); + *rset = rset_trunc(zh, grep_info->isam_p_buf, + grep_info->isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, + 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, index_type, hits_limit_value, + term_ref_id_str); + if (!*rset) + res = ZEBRA_FAIL; + } + wrbuf_destroy(display_term); + return res; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, @@ -966,7 +972,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, struct ord_list **ol, zebra_map_t zm) @@ -1016,113 +1022,161 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); prefix_len = wrbuf_len(term_dict); - - switch (truncation_value) - { - case -1: /* not specified */ - case 100: /* do not truncate */ - if (!string_relation(zh, zapt, &termp, term_dict, - attributeSet, - zm, space_split, term_dst, - &relation_error)) + + if (zebra_maps_is_icu(zm)) + { + int relation_value; + AttrType relation; + + attr_init_APT(&relation, zapt, 2); + relation_value = attr_find(&relation, NULL); + if (relation_value == 103) /* always matches */ + termp += strlen(termp); /* move to end of term */ + else if (relation_value == 3 || relation_value == 102 || relation_value == -1) { - if (relation_error) + /* ICU case */ + switch (truncation_value) { - zebra_setError(zh, relation_error, 0); + case -1: /* not specified */ + case 100: /* do not truncate */ + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 1: /* right truncation */ + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); return ZEBRA_FAIL; } - *term_sub = 0; - return ZEBRA_OK; - } - break; - case 1: /* right truncation */ - wrbuf_putc(term_dict, '('); - if (!term_100(zm, &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ".*)"); - break; - case 2: /* keft truncation */ - wrbuf_puts(term_dict, "(.*"); - if (!term_100(zm, &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 3: /* left&right truncation */ - wrbuf_puts(term_dict, "(.*"); - if (!term_100(zm, &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ".*)"); - break; - case 101: /* process # in term */ - wrbuf_putc(term_dict, '('); - if (!term_101(zm, &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ")"); - break; - case 102: /* Regexp-1 */ - wrbuf_putc(term_dict, '('); - if (!term_102(zm, &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 103: /* Regexp-2 */ - regex_range = 1; - wrbuf_putc(term_dict, '('); - if (!term_103(zm, &termp, term_dict, ®ex_range, - space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; } - wrbuf_putc(term_dict, ')'); - break; - case 104: /* process # and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_104(zm, &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 105: /* process * and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1)) + else { - *term_sub = 0; - return ZEBRA_OK; + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE, + relation_value); + return ZEBRA_FAIL; } - wrbuf_putc(term_dict, ')'); - break; - case 106: /* process * and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0)) + } + else + { + /* non-ICU case. using string.chr and friends */ + switch (truncation_value) { - *term_sub = 0; - return ZEBRA_OK; + case -1: /* not specified */ + case 100: /* do not truncate */ + if (!string_relation(zh, zapt, &termp, term_dict, + attributeSet, + zm, space_split, display_term, + &relation_error)) + { + if (relation_error) + { + zebra_setError(zh, relation_error, 0); + return ZEBRA_FAIL; + } + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 1: /* right truncation */ + wrbuf_putc(term_dict, '('); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); + break; + case 2: /* left truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 3: /* left&right truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); + break; + case 101: /* process # in term */ + wrbuf_putc(term_dict, '('); + if (!term_101(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ")"); + break; + case 102: /* Regexp-1 */ + wrbuf_putc(term_dict, '('); + if (!term_102(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 103: /* Regexp-2 */ + regex_range = 1; + wrbuf_putc(term_dict, '('); + if (!term_103(zm, &termp, term_dict, ®ex_range, + space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 104: /* process # and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_104(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 105: /* process * and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 106: /* process * and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); + return ZEBRA_FAIL; } - wrbuf_putc(term_dict, ')'); - break; - default: - zebra_setError_zint(zh, - YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, - truncation_value); - return ZEBRA_FAIL; } if (1) { @@ -1226,6 +1280,7 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, @@ -1235,16 +1290,14 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, struct rset_key_control *kc, zebra_map_t zm) { - char term_dst[IT_MAX_WORD+1]; struct grep_info grep_info; const char *termp = termz; int alloc_sets = 0; *num_result_sets = 0; - *term_dst = 0; if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; - while(1) + while (1) { ZEBRA_RES res; @@ -1258,10 +1311,10 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, alloc_sets = alloc_sets + add; *result_sets = rnew; } - res = search_term(zh, zapt, &termp, attributeSet, + res = search_term(zh, zapt, &termp, attributeSet, hits_limit, stream, &grep_info, index_type, complete_flag, - term_dst, rank_type, + rank_type, xpath_use, rset_nmem, &(*result_sets)[*num_result_sets], kc, zm); @@ -1304,6 +1357,7 @@ static ZEBRA_RES search_terms_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, @@ -1315,7 +1369,7 @@ static ZEBRA_RES search_terms_list(ZebraHandle zh, zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); if (zebra_maps_is_icu(zm)) zebra_map_tokenize_start(zm, termz, strlen(termz)); - return search_terms_chrmap(zh, zapt, termz, attributeSet, + return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit, stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, result_sets, num_result_sets, @@ -1393,6 +1447,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, const char *index_type, int complete_flag, @@ -1405,7 +1460,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res = - search_terms_list(zh, zapt, termz_org, attributeSet, + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -1458,6 +1513,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, const char *index_type, int complete_flag, @@ -1471,7 +1527,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - search_terms_list(zh, zapt, termz_org, attributeSet, + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -1525,6 +1581,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, const char *index_type, int complete_flag, @@ -1538,7 +1595,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - search_terms_list(zh, zapt, termz_org, attributeSet, + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -1595,7 +1652,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, int *max_pos, zebra_map_t zm, - char *term_dst, + WRBUF display_term, int *error_code) { AttrType relation; @@ -1614,7 +1671,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1624,7 +1681,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1634,7 +1691,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1644,7 +1701,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1655,7 +1712,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, case -1: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1691,7 +1748,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, struct ord_list **ol) { @@ -1731,7 +1788,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!numeric_relation(zh, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos, zm, - term_dst, &relation_error)) + display_term, &relation_error)) { if (relation_error) { @@ -1760,7 +1817,6 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, RSET *rset, struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; const char *termp = termz; RSET *result_sets = 0; int num_result_sets = 0; @@ -1780,6 +1836,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, { struct ord_list *ol; WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; @@ -1795,20 +1852,24 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, res = numeric_term(zh, zapt, &termp, term_dict, attributeSet, stream, &grep_info, index_type, complete_flag, - term_dst, xpath_use, &ol); + display_term, xpath_use, &ol); wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) + { + wrbuf_destroy(display_term); break; - yaz_log(YLOG_DEBUG, "term: %s", term_dst); + } + yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term)); result_sets[num_result_sets] = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term_dst, - strlen(term_dst), rank_type, + grep_info.isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); + wrbuf_destroy(display_term); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -1921,7 +1982,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; - sks->sortRelation = (int *) + sks->sortRelation = (Odr_int *) nmem_malloc(stream, sizeof(*sks->sortRelation)); if (sort_relation_value == 1) *sks->sortRelation = Z_SortKeySpec_ascending; @@ -1930,7 +1991,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else *sks->sortRelation = Z_SortKeySpec_ascending; - sks->caseSensitivity = (int *) + sks->caseSensitivity = (Odr_int *) nmem_malloc(stream, sizeof(*sks->caseSensitivity)); *sks->caseSensitivity = 0; @@ -2157,14 +2218,16 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc); static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, NMEM rset_nmem, @@ -2184,7 +2247,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, res = ZEBRA_FAIL; break; } - res = rpn_search_database(zh, zapt, attributeSet, stream, + res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream, sort_sequence, rset_nmem, rsets+i, kc); if (res != ZEBRA_OK) @@ -2211,7 +2274,8 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, @@ -2284,7 +2348,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, */ if (!strcmp(search_type, "phrase")) { - res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2292,7 +2357,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "and-list")) { - res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2300,7 +2366,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "or-list")) { - res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2333,7 +2400,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, @@ -2374,6 +2442,7 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, @@ -2384,7 +2453,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, ZEBRA_RES res; struct rset_key_control *kc = zebra_key_control_create(zh); - res = rpn_search_structure(zh, zs, attributeSet, + res = rpn_search_structure(zh, zs, attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, @@ -2410,7 +2479,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, @@ -2429,7 +2498,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, int num_result_sets_r = 0; res = rpn_search_structure(zh, zs->u.complex->s1, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_l, &num_result_sets_l, @@ -2442,7 +2511,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return res; } res = rpn_search_structure(zh, zs->u.complex->s2, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_r, &num_result_sets_r, @@ -2537,7 +2606,8 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { yaz_log(YLOG_DEBUG, "rpn_search_APT"); res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm, - attributeSet, stream, sort_sequence, + attributeSet, hits_limit, + stream, sort_sequence, num_bases, basenames, rset_nmem, &rset, kc); if (res != ZEBRA_OK) @@ -2579,6 +2649,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab