X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=47c1c234c84500c4dda68aab85c7a8301fc3cba5;hp=b399a49fcec944432b4e8c0205aaf9a257b14e1a;hb=c5971ebf8a88865ed9a1f7c8cf9daa22544f07be;hpb=e199777080c6fa0963d51b7df1763fd5286ca9a4 diff --git a/index/rpnsearch.c b/index/rpnsearch.c index b399a49..47c1c23 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,8 +1,5 @@ -/* $Id: rpnsearch.c,v 1.19 2007-10-31 16:56:14 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #ifdef WIN32 @@ -65,23 +65,26 @@ void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info) { map_info->zm = zm; - dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); + if (zebra_maps_is_icu(zm)) + dict_grep_cmap(reg->dict, 0, 0); + else + dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); } -#define TERM_COUNT - -struct grep_info { -#ifdef TERM_COUNT - int *term_no; -#endif +#define TERM_COUNT + +struct grep_info { +#ifdef TERM_COUNT + int *term_no; +#endif ISAM_P *isam_p_buf; - int isam_p_size; + int isam_p_size; int isam_p_indx; int trunc_max; ZebraHandle zh; const char *index_type; ZebraSet termset; -}; +}; static int add_isam_p(const char *name, const char *info, struct grep_info *p) @@ -99,8 +102,8 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_indx == p->isam_p_size) { ISAM_P *new_isam_p_buf; -#ifdef TERM_COUNT - int *new_term_no; +#ifdef TERM_COUNT + int *new_term_no; #endif p->isam_p_size = 2*p->isam_p_size + 100; new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) * @@ -108,7 +111,7 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_buf) { memcpy(new_isam_p_buf, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->isam_p_buf)); + p->isam_p_indx * sizeof(*p->isam_p_buf)); xfree(p->isam_p_buf); } p->isam_p_buf = new_isam_p_buf; @@ -118,7 +121,7 @@ static int add_isam_p(const char *name, const char *info, if (p->term_no) { memcpy(new_term_no, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->term_no)); + p->isam_p_indx * sizeof(*p->term_no)); xfree(p->term_no); } p->term_no = new_term_no; @@ -134,13 +137,13 @@ static int add_isam_p(const char *name, const char *info, int ord = 0; const char *index_name; int len = key_SU_decode(&ord, (const unsigned char *) name); - + zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len); yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord(p->zh->reg->zei, ord, 0 /* index_type */, &db, &index_name); yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name); - + resultSetAddTerm(p->zh, p->termset, name[len], db, index_name, term_tmp); } @@ -154,7 +157,7 @@ static int grep_handle(char *name, const char *info, void *p) } static int term_pre(zebra_map_t zm, const char **src, - const char *ct1, const char *ct2, int first) + const char *ct1, int first) { const char *s1, *s0 = *src; const char **map; @@ -164,8 +167,6 @@ static int term_pre(zebra_map_t zm, const char **src, { if (ct1 && strchr(ct1, *s0)) break; - if (ct2 && strchr(ct2, *s0)) - break; s1 = s0; map = zebra_maps_input(zm, &s1, strlen(s1), first); if (**map != *CHR_SPACE) @@ -186,7 +187,7 @@ static void esc_str(char *out_buf, size_t out_size, assert(in_buf); assert(out_size > 20); *out_buf = '\0'; - for (k = 0; k 0 && buf[--i] != '\x01') /* skip length */ + ; + while (i > 0 && buf[--i] != '\x01') /* skip accents */ + ; + return i; /* only basechars left */ +} + +static int term_102_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) +{ + int no_terms = 0; + const char *s0 = *src, *s1; + while (*s0 == ' ') + s0++; + s1 = s0; + for (;;) + { + if (*s1 == ' ' && space_split) + break; + else if (*s1 && !strchr(REGEX_CHARS "-", *s1)) + s1++; + else + { + /* EOF or regex reserved char */ + if (s0 != s1) + { + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + + zebra_map_tokenize_start(zm, s0, s1 - s0); + + if (zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + size_t i; + res_len = icu_basechars(res_buf, res_len); + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + wrbuf_write(display_term, display_buf, display_len); + + no_terms++; + } + } + if (*s1 == '\0') + break; + + wrbuf_putc(term_dict, *s1); + wrbuf_putc(display_term, *s1); + + s1++; + s0 = s1; + } + } + if (no_terms) + wrbuf_puts(term_dict, "\x01\x01.*"); + *src = s1; + return no_terms; +} + +static int term_100_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, + WRBUF display_term, + int mode, + size_t token_number) +{ + size_t i; + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + + zebra_map_tokenize_start(zm, *src, strlen(*src)); + for (i = 0; i <= token_number; i++) + { + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + return 0; + } + wrbuf_write(display_term, display_buf, display_len); + if (mode) + { + res_len = icu_basechars(res_buf, res_len); + } + if (mode & 2) + wrbuf_puts(term_dict, ".*"); + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + if (mode & 1) + wrbuf_puts(term_dict, ".*"); + else if (mode) + wrbuf_puts(term_dict, "\x01\x01.*"); + return 1; +} + /* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; const char *space_start = 0; const char *space_end = 0; - if (!term_pre(zm, src, NULL, NULL, !space_split)) + if (!term_pre(zm, src, 0, !space_split)) return 0; s0 = *src; while (*s0) @@ -272,10 +391,10 @@ static int term_100(zebra_map_t zm, { if (strchr(REGEX_CHARS, *space_start)) wrbuf_putc(term_dict, '\\'); - dst_term[j++] = *space_start; + wrbuf_putc(display_term, *space_start); wrbuf_putc(term_dict, *space_start); space_start++; - + } /* and reset */ space_start = space_end = 0; @@ -283,10 +402,8 @@ static int term_100(zebra_map_t zm, } i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } - dst_term[j] = '\0'; *src = s0; return i; } @@ -294,14 +411,13 @@ static int term_100(zebra_map_t zm, /* term_101: handle term, where trunc = Process # */ static int term_101(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "#", "#", !space_split)) + if (!term_pre(zm, src, "#", !space_split)) return 0; s0 = *src; while (*s0) @@ -310,7 +426,8 @@ static int term_101(zebra_map_t zm, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -321,11 +438,9 @@ static int term_101(zebra_map_t zm, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j++] = '\0'; *src = s0; return i; } @@ -333,14 +448,13 @@ static int term_101(zebra_map_t zm, /* term_103: handle term, where trunc = re-2 (regular expressions) */ static int term_103(zebra_map_t zm, const char **src, WRBUF term_dict, int *errors, int space_split, - char *dst_term) + WRBUF display_term) { int i = 0; - int j = 0; const char *s0; const char **map; - if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zm, src, "^\\()[].*+?|", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -355,7 +469,7 @@ static int term_103(zebra_map_t zm, const char **src, { if (strchr("^\\()[].*+?|-", *s0)) { - dst_term[j++] = *s0; + wrbuf_putc(display_term, *s0); wrbuf_putc(term_dict, *s0); s0++; i++; @@ -369,34 +483,31 @@ static int term_103(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j] = '\0'; *src = s0; - + return i; } /* term_103: handle term, where trunc = re-1 (regular expressions) */ static int term_102(zebra_map_t zm, const char **src, - WRBUF term_dict, int space_split, char *dst_term) + WRBUF term_dict, int space_split, WRBUF display_term) { - return term_103(zm, src, term_dict, NULL, space_split, dst_term); + return term_103(zm, src, term_dict, NULL, space_split, display_term); } -/* term_104: handle term, process # and ! */ -static int term_104(zebra_map_t zm, const char **src, - WRBUF term_dict, int space_split, char *dst_term) +/* term_104: handle term, process ?n * # */ +static int term_104(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "?*#", "?*#", !space_split)) + if (!term_pre(zm, src, "?*#", !space_split)) return 0; s0 = *src; while (*s0) @@ -404,14 +515,16 @@ static int term_104(zebra_map_t zm, const char **src, if (*s0 == '?') { i++; - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; if (*s0 >= '0' && *s0 <= '9') { int limit = 0; while (*s0 >= '0' && *s0 <= '9') { limit = limit * 10 + (*s0 - '0'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } if (limit > 20) limit = 20; @@ -429,13 +542,15 @@ static int term_104(zebra_map_t zm, const char **src, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '#') { i++; wrbuf_puts(term_dict, "."); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -446,26 +561,23 @@ static int term_104(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j++] = '\0'; *src = s0; return i; } -/* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105(zebra_map_t zm, const char **src, +/* term_105/106: handle term, process * ! and possibly right_truncate */ +static int term_105(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term, int right_truncate) + WRBUF display_term, int right_truncate) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "*!", "*!", !space_split)) + if (!term_pre(zm, src, "\\*!", !space_split)) return 0; s0 = *src; while (*s0) @@ -474,13 +586,22 @@ static int term_105(zebra_map_t zm, const char **src, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '!') { i++; wrbuf_putc(term_dict, '.'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; + } + else if (*s0 == '\\') + { + i++; + wrbuf_puts(term_dict, "\\\\"); + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -491,13 +612,11 @@ static int term_105(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } if (right_truncate) wrbuf_puts(term_dict, ".*"); - dst_term[j++] = '\0'; *src = s0; return i; } @@ -523,7 +642,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) strcpy(dst, "(-[0-9]+|("); else strcpy(dst, "(("); - } + } else { if (!islt) @@ -550,7 +669,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) if (d == '0') continue; d--; - } + } else { if (d == '9') @@ -558,7 +677,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) d++; } } - + strcpy(dst + dst_p, numstr); dst_p = strlen(dst) - pos - 1; @@ -578,7 +697,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) else { if (d != '9') - { + { dst[dst_p++] = '['; dst[dst_p++] = d; dst[dst_p++] = '-'; @@ -588,7 +707,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) else dst[dst_p++] = d; } - for (i = 0; iapprox_limit; - } - else - { - /* no counting if term_ref is not present */ - *hits_limit_value = 0; - } - } - else if (*hits_limit_value == 0) - { - /* 0 is the same as global limit */ - *hits_limit_value = zh->approx_limit; - } yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, *term_ref_id_str ? *term_ref_id_str : "none", *hits_limit_value); return ZEBRA_OK; } -static ZEBRA_RES term_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char **term_sub, - const Odr_oid *attributeSet, NMEM stream, - struct grep_info *grep_info, - const char *index_type, int complete_flag, - char *term_dst, - const char *rank_type, - const char *xpath_use, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** \brief search for term (which may be truncated) + */ +static ZEBRA_RES search_term(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char **term_sub, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + struct grep_info *grep_info, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc, + zebra_map_t zm, + size_t token_number) { ZEBRA_RES res; struct ord_list *ol; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; WRBUF term_dict = wrbuf_alloc(); - + WRBUF display_term = wrbuf_alloc(); *rset = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, term_dict, attributeSet, stream, grep_info, index_type, complete_flag, - term_dst, xpath_use, &ol); + display_term, xpath_use, &ol, zm, token_number); wrbuf_destroy(term_dict); - if (res != ZEBRA_OK) - return res; - if (!*term_sub) /* no more terms ? */ - return res; - yaz_log(log_level_rpn, "term: %s", term_dst); - *rset = rset_trunc(zh, grep_info->isam_p_buf, - grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */, - zapt->term->which, rset_nmem, - kc, kc->scope, ol, index_type, hits_limit_value, - term_ref_id_str); - if (!*rset) - return ZEBRA_FAIL; - return ZEBRA_OK; + if (res == ZEBRA_OK && *term_sub) + { + yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term)); + *rset = rset_trunc(zh, grep_info->isam_p_buf, + grep_info->isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, + 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, index_type, hits_limit_value, + term_ref_id_str); + if (!*rset) + res = ZEBRA_FAIL; + } + wrbuf_destroy(display_term); + return res; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, + const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol) + struct ord_list **ol, + zebra_map_t zm, size_t token_number) { int r; AttrType truncation; @@ -948,8 +1060,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_error; char ord_buf[32]; int ord_len, i; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type); - + *ol = ord_list_create(stream); rpn_char_map_prepare(zh->reg, zm, &rcmi); @@ -958,136 +1069,205 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, yaz_log(log_level_rpn, "truncation value %d", truncation_value); termp = *term_sub; /* start of term for each database */ - + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, attributeSet, &ord) != ZEBRA_OK) { *term_sub = 0; return ZEBRA_FAIL; } - + wrbuf_rewind(term_dict); /* new dictionary regexp term */ - + *ol = ord_list_append(stream, *ol, ord); ord_len = key_SU_encode(ord, ord_buf); - + wrbuf_putc(term_dict, '('); - - for (i = 0; ireg->dict, wrbuf_cstr(term_dict), regex_range, - grep_info, &max_pos, + grep_info, &max_pos, ord_len /* number of "exact" chars */, grep_handle); if (r == 1) @@ -1163,12 +1348,12 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, const char *termset_name = 0; if (termset_value_numeric != -2) { - + sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } else - termset_name = termset_value_string; + termset_name = termset_value_string; yaz_log(log_level_rpn, "creating termset set %s", termset_name); grep_info->termset = resultSetAdd(zh, termset_name, 1); if (!grep_info->termset) @@ -1181,65 +1366,50 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, } return ZEBRA_OK; } - -/** - \brief Create result set(s) for list of terms - \param zh Zebra Handle - \param zapt Attributes Plust Term (RPN leaf) - \param termz term as used in query but converted to UTF-8 - \param attributeSet default attribute set - \param stream memory for result - \param index_type register type ("w", "p",..) - \param complete_flag whether it's phrases or not - \param rank_type term flags for ranking - \param xpath_use use attribute for X-Path (-1 for no X-path) - \param rset_nmem memory for result sets - \param result_sets output result set for each term in list (output) - \param num_result_sets number of output result sets - \param kc rset key control to be used for created result sets -*/ -static ZEBRA_RES term_list_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz, - const Odr_oid *attributeSet, - NMEM stream, - const char *index_type, int complete_flag, - const char *rank_type, - const char *xpath_use, - NMEM rset_nmem, - RSET **result_sets, int *num_result_sets, - struct rset_key_control *kc) + +static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc, + zebra_map_t zm) { - char term_dst[IT_MAX_WORD+1]; struct grep_info grep_info; const char *termp = termz; int alloc_sets = 0; *num_result_sets = 0; - *term_dst = 0; if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; - while(1) - { + while (1) + { ZEBRA_RES res; if (alloc_sets == *num_result_sets) { int add = 10; - RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * sizeof(*rnew)); if (alloc_sets) memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew)); alloc_sets = alloc_sets + add; *result_sets = rnew; } - res = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - index_type, complete_flag, - term_dst, rank_type, - xpath_use, rset_nmem, - &(*result_sets)[*num_result_sets], - kc); + res = search_term(zh, zapt, &termp, attributeSet, hits_limit, + stream, &grep_info, + index_type, complete_flag, + rank_type, + xpath_use, rset_nmem, + &(*result_sets)[*num_result_sets], + kc, zm, + *num_result_sets); if (res != ZEBRA_OK) { int i; @@ -1259,13 +1429,53 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, return ZEBRA_OK; } -static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, - const char *index_type, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** + \brief Create result set(s) for list of terms + \param zh Zebra Handle + \param zapt Attributes Plust Term (RPN leaf) + \param termz term as used in query but converted to UTF-8 + \param attributeSet default attribute set + \param stream memory for result + \param index_type register type ("w", "p",..) + \param complete_flag whether it's phrases or not + \param rank_type term flags for ranking + \param xpath_use use attribute for X-Path (-1 for no X-path) + \param rset_nmem memory for result sets + \param result_sets output result set for each term in list (output) + \param num_result_sets number of output result sets + \param kc rset key control to be used for created result sets +*/ +static ZEBRA_RES search_terms_list(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc) +{ + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, result_sets, num_result_sets, + kc, zm); +} + + +/** \brief limit a search by position - returns result set + */ +static ZEBRA_RES search_position(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + const char *index_type, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { int position_value; AttrType position; @@ -1275,8 +1485,8 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, int ord_len; char *val; ISAM_P isam_p; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type); - + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + attr_init_APT(&position, zapt, 3); position_value = attr_find(&position, NULL); switch(position_value) @@ -1301,13 +1511,6 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zh->reg->isamb && !zh->reg->isamc) - { - zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, - position_value); - return ZEBRA_FAIL; - } - if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeSet, &ord) != ZEBRA_OK) { @@ -1321,23 +1524,23 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, { assert(*val == sizeof(ISAM_P)); memcpy(&isam_p, val+1, sizeof(isam_p)); - - if (zh->reg->isamb) - *rset = rsisamb_create(rset_nmem, kc, kc->scope, - zh->reg->isamb, isam_p, 0); - else if (zh->reg->isamc) - *rset = rsisamc_create(rset_nmem, kc, kc->scope, - zh->reg->isamc, isam_p, 0); + + *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, + isam_p, 0); } return ZEBRA_OK; } - + +/** \brief returns result set for phrase search + */ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, @@ -1347,11 +1550,11 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, index_type, complete_flag, - rank_type, xpath_use, - rset_nmem, - &result_sets, &num_result_sets, kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, kc); if (res != ZEBRA_OK) return res; @@ -1359,12 +1562,17 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, if (num_result_sets > 0) { RSET first_set = 0; - res = rpn_search_APT_position(zh, zapt, attributeSet, - index_type, - rset_nmem, &first_set, - kc); + res = search_position(zh, zapt, attributeSet, + index_type, + rset_nmem, &first_set, + kc); if (res != ZEBRA_OK) + { + int i; + for (i = 0; i < num_result_sets; i++) + rset_delete(result_sets[i]); return res; + } if (first_set) { RSET *nsets = nmem_malloc(stream, @@ -1376,7 +1584,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, } } if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1389,12 +1597,15 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, return ZEBRA_OK; } +/** \brief returns result set for or-list search + */ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, + const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, @@ -1406,24 +1617,24 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, index_type, complete_flag, - rank_type, xpath_use, - rset_nmem, - &result_sets, &num_result_sets, kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, kc); if (res != ZEBRA_OK) return res; - for (i = 0; iscope, 2, tmp_set, @@ -1443,7 +1654,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, } } if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1454,14 +1665,17 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, return ZEBRA_OK; } +/** \brief returns result set for and-list search + */ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, + const char *index_type, int complete_flag, - const char *rank_type, + const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, @@ -1471,24 +1685,24 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, index_type, complete_flag, - rank_type, xpath_use, - rset_nmem, - &result_sets, &num_result_sets, - kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, + kc); if (res != ZEBRA_OK) return res; - for (i = 0; iscope, 2, tmp_set, @@ -1510,12 +1724,12 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else *rset = rset_create_and(rset_nmem, kc, kc->scope, - num_result_sets, result_sets); + num_result_sets, result_sets); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; @@ -1528,7 +1742,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, int *max_pos, zebra_map_t zm, - char *term_dst, + WRBUF display_term, int *error_code) { AttrType relation; @@ -1547,8 +1761,8 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) - { + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { wrbuf_destroy(term_num); return 0; } @@ -1557,7 +1771,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1567,7 +1781,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1577,7 +1791,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1586,12 +1800,13 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, gen_regular_rel(term_dict, term_value+1, 0); break; case -1: + case 102: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); - return 0; + return 0; } term_value = atoi(wrbuf_cstr(term_num)); wrbuf_printf(term_dict, "(0*%d)", term_value); @@ -1603,10 +1818,10 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; - wrbuf_destroy(term_num); + wrbuf_destroy(term_num); return 0; } - r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, grep_info, max_pos, 0, grep_handle); if (r == 1) @@ -1619,12 +1834,12 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, + const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, struct ord_list **ol) { @@ -1634,26 +1849,26 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_error = 0; int ord, ord_len, i; char ord_buf[32]; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type); - + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + *ol = ord_list_create(stream); rpn_char_map_prepare(zh->reg, zm, &rcmi); termp = *term_sub; - + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, attributeSet, &ord) != ZEBRA_OK) { return ZEBRA_FAIL; } - + wrbuf_rewind(term_dict); - + *ol = ord_list_append(stream, *ol, ord); - + ord_len = key_SU_encode(ord, ord_buf); - + wrbuf_putc(term_dict, '('); for (i = 0; i < ord_len; i++) { @@ -1661,10 +1876,10 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ord_buf[i]); } wrbuf_putc(term_dict, ')'); - + if (!numeric_relation(zh, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos, zm, - term_dst, &relation_error)) + display_term, &relation_error)) { if (relation_error) { @@ -1679,43 +1894,45 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } - + static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, + const char *index_type, int complete_flag, - const char *rank_type, + const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; const char *termp = termz; RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res; struct grep_info grep_info; int alloc_sets = 0; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) - { + { struct ord_list *ol; WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; - RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * sizeof(*rnew)); if (alloc_sets) memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew)); @@ -1727,20 +1944,24 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, res = numeric_term(zh, zapt, &termp, term_dict, attributeSet, stream, &grep_info, index_type, complete_flag, - term_dst, xpath_use, &ol); + display_term, xpath_use, &ol); wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) + { + wrbuf_destroy(display_term); break; - yaz_log(YLOG_DEBUG, "term: %s", term_dst); + } + yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term)); result_sets[num_result_sets] = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term_dst, - strlen(term_dst), rank_type, + grep_info.isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, 0 /* preserve position */, - zapt->term->which, rset_nmem, + zapt->term->which, rset_nmem, kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); + wrbuf_destroy(display_term); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -1774,7 +1995,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, { Record rec; zint sysno = atozint(termz); - + if (sysno <= 0) sysno = 0; rec = rec_get(zh->reg->records, sysno); @@ -1794,7 +2015,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, *rset = rset_create_temp(rset_nmem, kc, kc->scope, res_get(zh->res, "setTmpDir"), 0); rsfd = rset_open(*rset, RSETF_WRITE); - + key.mem[0] = sysno; key.mem[1] = 1; key.len = 2; @@ -1818,7 +2039,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, Z_SortKeySpec *sks; Z_SortKey *sk; char termz[20]; - + attr_init_APT(&sort_relation_type, zapt, 7); sort_relation_value = attr_find(&sort_relation_type, &attributeSet); @@ -1827,15 +2048,15 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sort_sequence->num_specs = 10; sort_sequence->specs = (Z_SortKeySpec **) nmem_malloc(stream, sort_sequence->num_specs * - sizeof(*sort_sequence->specs)); - for (i = 0; inum_specs; i++) + sizeof(*sort_sequence->specs)); + for (i = 0; i < sort_sequence->num_specs; i++) sort_sequence->specs[i] = 0; } if (zapt->term->which != Z_Term_general) i = 0; else i = atoi_n((char *) zapt->term->u.general->buf, - zapt->term->u.general->len); + zapt->term->u.general->len); if (i >= sort_sequence->num_specs) i = 0; sprintf(termz, "%d", i); @@ -1853,16 +2074,16 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; - sks->sortRelation = (int *) + sks->sortRelation = (Odr_int *) nmem_malloc(stream, sizeof(*sks->sortRelation)); if (sort_relation_value == 1) *sks->sortRelation = Z_SortKeySpec_ascending; else if (sort_relation_value == 2) *sks->sortRelation = Z_SortKeySpec_descending; - else + else *sks->sortRelation = Z_SortKeySpec_ascending; - sks->caseSensitivity = (int *) + sks->caseSensitivity = (Odr_int *) nmem_malloc(stream, sizeof(*sks->caseSensitivity)); *sks->caseSensitivity = 0; @@ -1882,7 +2103,7 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; - + attr_init_APT(&use, zapt, 1); attr_find_ex(&use, &curAttributeSet, &use_string); @@ -1891,27 +2112,27 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return zebra_parse_xpath_str(use_string, xpath, max, mem); } - - + + static RSET xpath_trunc(ZebraHandle zh, NMEM stream, - const char *index_type, const char *term, + const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc) { struct grep_info grep_info; - int ord = zebraExplain_lookup_attr_str(zh->reg->zei, + int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, index_type, xpath_use); if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); - + if (ord < 0) return rset_create_null(rset_nmem, kc, 0); else { - int i, r, max_pos; + int i, max_pos; char ord_buf[32]; RSET rset; WRBUF term_dict = wrbuf_alloc(); @@ -1920,17 +2141,17 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, const char *flags = "void"; wrbuf_putc(term_dict, '('); - for (i = 0; ireg->dict, wrbuf_cstr(term_dict), 0, - &grep_info, &max_pos, 0, grep_handle); + dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); yaz_log(YLOG_DEBUG, "%s %d positions", term, grep_info.isam_p_indx); rset = rset_trunc(zh, grep_info.isam_p_buf, @@ -1962,37 +2183,37 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len); - for (i = 0; i a/.* - //a/b -> b/a/.* - /a -> a/ - /a/b -> b/a/ - - / -> none - - a[@attr = value]/b[@other = othervalue] - - /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) - /a/b val range(b/a/,freetext(w,1016,val),b/a/) - /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) - /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) - /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) - /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) - + //a -> a/.* + //a/b -> b/a/.* + /a -> a/ + /a/b -> b/a/ + + / -> none + + a[@attr = value]/b[@other = othervalue] + + /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) + /a/b val range(b/a/,freetext(w,1016,val),b/a/) + /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) + /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) + /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) + /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) + */ dict_grep_cmap(zh->reg->dict, 0, 0); - + { int level = xpath_len; int first_path = 1; - + while (--level >= 0) { WRBUF xpath_rev = wrbuf_alloc(); @@ -2032,7 +2253,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, { const char *cp = xpath[level].predicate->u.relation.value; wrbuf_putc(wbuf, '='); - + while (*cp) { if (strchr(REGEX_CHARS, *cp)) @@ -2042,12 +2263,12 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } } rset_attr = xpath_trunc( - zh, stream, "0", wrbuf_cstr(wbuf), - ZEBRA_XPATH_ATTR_NAME, + zh, stream, "0", wrbuf_cstr(wbuf), + ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); wrbuf_destroy(wbuf); - } - else + } + else { if (!first_path) { @@ -2055,23 +2276,23 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, continue; } } - yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, + yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { - rset_start_tag = xpath_trunc(zh, stream, "0", + rset_start_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), - ZEBRA_XPATH_ELM_BEGIN, + ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); if (always_matches) rset = rset_start_tag; else { - rset_end_tag = xpath_trunc(zh, stream, "0", + rset_end_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), - ZEBRA_XPATH_ELM_END, + ZEBRA_XPATH_ELM_END, rset_nmem, kc); - + rset = rset_create_between(rset_nmem, kc, kc->scope, rset_start_tag, rset, rset_end_tag, rset_attr); @@ -2087,18 +2308,20 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 -static ZEBRA_RES rpn_search_database(ZebraHandle zh, +static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc); static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -2116,7 +2339,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, res = ZEBRA_FAIL; break; } - res = rpn_search_database(zh, zapt, attributeSet, stream, + res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream, sort_sequence, rset_nmem, rsets+i, kc); if (res != ZEBRA_OK) @@ -2128,12 +2351,12 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_delete(rsets[i]); *rset = 0; } - else + else { if (num_bases == 1) *rset = rsets[0]; else if (num_bases == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */, num_bases, rsets); @@ -2141,9 +2364,10 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return res; } -static ZEBRA_RES rpn_search_database(ZebraHandle zh, +static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, @@ -2167,7 +2391,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag); - + yaz_log(YLOG_DEBUG, "index_type=%s", index_type); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); @@ -2180,14 +2404,14 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence, rank_type, rset_nmem, rset, kc); /* consider if an X-Path query is used */ - xpath_len = rpn_check_xpath(zh, zapt, attributeSet, + xpath_len = rpn_check_xpath(zh, zapt, attributeSet, xpath, MAX_XPATH_STEPS, stream); if (xpath_len >= 0) { - if (xpath[xpath_len-1].part[0] == '@') + if (xpath[xpath_len-1].part[0] == '@') xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */ else - xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */ + xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */ if (1) { @@ -2200,7 +2424,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, if (relation_value == 103) /* alwaysmatches */ { *rset = 0; /* signal no "term" set */ - return rpn_search_xpath(zh, stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } } @@ -2208,7 +2432,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, /* search using one of the various search type strategies termz is our UTF-8 search term - attributeSet is top-level default attribute set + attributeSet is top-level default attribute set stream is ODR for search reg_id is the register type complete_flag is 1 for complete subfield, 0 for incomplete @@ -2216,7 +2440,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, */ if (!strcmp(search_type, "phrase")) { - res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2224,7 +2449,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "and-list")) { - res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2232,7 +2458,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "or-list")) { - res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2245,7 +2472,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "numeric")) { - res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2260,15 +2488,16 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, return res; if (!*rset) return ZEBRA_FAIL; - return rpn_search_xpath(zh, stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc); @@ -2293,9 +2522,9 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm; AttrType global_hits_limit_attr; int l; - + attr_init_APT(&global_hits_limit_attr, zapt, 12); - + l = attr_find(&global_hits_limit_attr, NULL); if (l != -1) *approx_limit = l; @@ -2305,10 +2534,11 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET *result_set) { RSET *result_sets = 0; @@ -2316,9 +2546,9 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, ZEBRA_RES res; struct rset_key_control *kc = zebra_key_control_create(zh); - res = rpn_search_structure(zh, zs, attributeSet, + res = rpn_search_structure(zh, zs, attributeSet, hits_limit, stream, rset_nmem, - sort_sequence, + sort_sequence, num_bases, basenames, &result_sets, &num_result_sets, 0 /* no parent op */, @@ -2326,7 +2556,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, if (res != ZEBRA_OK) { int i; - for (i = 0; iu.complex->s1, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_l, &num_result_sets_l, @@ -2369,12 +2599,12 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, if (res != ZEBRA_OK) { int i; - for (i = 0; iu.complex->s2, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_r, &num_result_sets_r, @@ -2382,20 +2612,20 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, if (res != ZEBRA_OK) { int i; - for (i = 0; iwhich != zop->which @@ -2426,7 +2656,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, case Z_Operator_prox: if (zop->u.prox->which != Z_ProximityOperator_known) { - zebra_setError(zh, + zebra_setError(zh, YAZ_BIB1_UNSUPP_PROX_UNIT_CODE, 0); return ZEBRA_FAIL; @@ -2442,9 +2672,9 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { rset = rset_create_prox(rset_nmem, kc, kc->scope, - *num_result_sets, *result_sets, + *num_result_sets, *result_sets, *zop->u.prox->ordered, - (!zop->u.prox->exclusion ? + (!zop->u.prox->exclusion ? 0 : *zop->u.prox->exclusion), *zop->u.prox->relationType, *zop->u.prox->distance ); @@ -2455,7 +2685,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return ZEBRA_FAIL; } *num_result_sets = 1; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); (*result_sets)[0] = rset; } @@ -2469,7 +2699,8 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { yaz_log(YLOG_DEBUG, "rpn_search_APT"); res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm, - attributeSet, stream, sort_sequence, + attributeSet, hits_limit, + stream, sort_sequence, num_bases, basenames, rset_nmem, &rset, kc); if (res != ZEBRA_OK) @@ -2481,7 +2712,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, rset = resultSetRef(zh, zs->u.simple->u.resultSetId); if (!rset) { - zebra_setError(zh, + zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, zs->u.simple->u.resultSetId); return ZEBRA_FAIL; @@ -2494,7 +2725,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return ZEBRA_FAIL; } *num_result_sets = 1; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); (*result_sets)[0] = rset; } @@ -2511,6 +2742,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab