X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=7733a5d9e651b306ded58c3ca15adf9539b2503a;hp=ca44a8139d0b9d0d53ddabac07f3ca49ad35b9b2;hb=f5eb6612606c3365834ea5aad430b364902d42a3;hpb=6f7dfe3c3f09f7104c1ae7616c9d207edeab308d diff --git a/index/rpnsearch.c b/index/rpnsearch.c index ca44a81..7733a5d 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,8 +1,5 @@ -/* $Id: rpnsearch.c,v 1.18 2007-10-30 19:17:15 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #ifdef WIN32 @@ -65,23 +65,26 @@ void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info) { map_info->zm = zm; - dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); + if (zebra_maps_is_icu(zm)) + dict_grep_cmap(reg->dict, 0, 0); + else + dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); } -#define TERM_COUNT - -struct grep_info { -#ifdef TERM_COUNT - int *term_no; -#endif +#define TERM_COUNT + +struct grep_info { +#ifdef TERM_COUNT + int *term_no; +#endif ISAM_P *isam_p_buf; - int isam_p_size; + int isam_p_size; int isam_p_indx; int trunc_max; ZebraHandle zh; - int reg_type; + const char *index_type; ZebraSet termset; -}; +}; static int add_isam_p(const char *name, const char *info, struct grep_info *p) @@ -99,8 +102,8 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_indx == p->isam_p_size) { ISAM_P *new_isam_p_buf; -#ifdef TERM_COUNT - int *new_term_no; +#ifdef TERM_COUNT + int *new_term_no; #endif p->isam_p_size = 2*p->isam_p_size + 100; new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) * @@ -108,7 +111,7 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_buf) { memcpy(new_isam_p_buf, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->isam_p_buf)); + p->isam_p_indx * sizeof(*p->isam_p_buf)); xfree(p->isam_p_buf); } p->isam_p_buf = new_isam_p_buf; @@ -118,7 +121,7 @@ static int add_isam_p(const char *name, const char *info, if (p->term_no) { memcpy(new_term_no, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->term_no)); + p->isam_p_indx * sizeof(*p->term_no)); xfree(p->term_no); } p->term_no = new_term_no; @@ -134,13 +137,13 @@ static int add_isam_p(const char *name, const char *info, int ord = 0; const char *index_name; int len = key_SU_decode(&ord, (const unsigned char *) name); - - zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len); + + zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len); yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord(p->zh->reg->zei, ord, 0 /* index_type */, &db, &index_name); yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name); - + resultSetAddTerm(p->zh, p->termset, name[len], db, index_name, term_tmp); } @@ -154,7 +157,7 @@ static int grep_handle(char *name, const char *info, void *p) } static int term_pre(zebra_map_t zm, const char **src, - const char *ct1, const char *ct2, int first) + const char *ct1, int first) { const char *s1, *s0 = *src; const char **map; @@ -164,8 +167,6 @@ static int term_pre(zebra_map_t zm, const char **src, { if (ct1 && strchr(ct1, *s0)) break; - if (ct2 && strchr(ct2, *s0)) - break; s1 = s0; map = zebra_maps_input(zm, &s1, strlen(s1), first); if (**map != *CHR_SPACE) @@ -203,16 +204,16 @@ static void esc_str(char *out_buf, size_t out_size, } } -#define REGEX_CHARS " []()|.*+?!" +#define REGEX_CHARS " ^[]()|.*+?!\"$\\" static void add_non_space(const char *start, const char *end, WRBUF term_dict, - char *dst_term, int *dst_ptr, + WRBUF display_term, const char **map, int q_map_match) { size_t sz = end - start; - memcpy(dst_term + *dst_ptr, start, sz); - (*dst_ptr) += sz; + + wrbuf_write(display_term, start, sz); if (!q_map_match) { while (start < end) @@ -227,25 +228,170 @@ static void add_non_space(const char *start, const char *end, { char tmpbuf[80]; esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - + wrbuf_puts(term_dict, map[0]); } } + +static int term_102_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) +{ + int no_terms = 0; + const char *s0 = *src, *s1; + while (*s0 == ' ') + s0++; + s1 = s0; + for (;;) + { + if (*s1 == ' ' && space_split) + break; + else if (*s1 && !strchr(REGEX_CHARS "-", *s1)) + s1++; + else + { + /* EOF or regex reserved char */ + if (s0 != s1) + { + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + + zebra_map_tokenize_start(zm, s0, s1 - s0); + + if (zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + size_t i = res_len; + while (--i >= 0 && res_buf[i] != '\x01') + ; + if (i > 0) + { + while (--i >= 0 && res_buf[i] != '\x01') + ; + } + res_len = i; /* reduce res_len */ + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + wrbuf_write(display_term, display_buf, display_len); + + no_terms++; + } + } + if (*s1 == '\0') + break; + + wrbuf_putc(term_dict, *s1); + wrbuf_putc(display_term, *s1); + + s1++; + s0 = s1; + } + } + if (no_terms) + wrbuf_puts(term_dict, "\x01\x01.*"); + *src = s1; + return no_terms; +} + +static int term_100_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term, + int mode) +{ + size_t i; + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + const char *s0 = *src, *s1; + + while (*s0 == ' ') + s0++; + + if (*s0 == '\0') + return 0; + + if (space_split) + { + s1 = s0; + while (*s1 && *s1 != ' ') + s1++; + } + else + s1 = s0 + strlen(s0); + + *src = s1; + + zebra_map_tokenize_start(zm, s0, s1 - s0); + + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + return 0; + } + wrbuf_write(display_term, display_buf, display_len); + if (mode) + { + /* ICU sort keys seem to be of the form + basechars \x01 accents \x01 length + For now we'll just right truncate from basechars . This + may give false hits due to accents not being used. + */ + i = res_len; + while (--i >= 0 && res_buf[i] != '\x01') + ; + if (i > 0) + { + while (--i >= 0 && res_buf[i] != '\x01') + ; + } + if (i == 0) + { /* did not find base chars at all. Throw error */ + return -1; + } + res_len = i; /* reduce res_len */ + } + if (mode & 2) + wrbuf_puts(term_dict, ".*"); + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + if (mode & 1) + wrbuf_puts(term_dict, ".*"); + else if (mode) + wrbuf_puts(term_dict, "\x01\x01.*"); + return 1; +} + /* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; const char *space_start = 0; const char *space_end = 0; - if (!term_pre(zm, src, NULL, NULL, !space_split)) + if (!term_pre(zm, src, 0, !space_split)) return 0; s0 = *src; while (*s0) @@ -272,10 +418,10 @@ static int term_100(zebra_map_t zm, { if (strchr(REGEX_CHARS, *space_start)) wrbuf_putc(term_dict, '\\'); - dst_term[j++] = *space_start; + wrbuf_putc(display_term, *space_start); wrbuf_putc(term_dict, *space_start); space_start++; - + } /* and reset */ space_start = space_end = 0; @@ -283,10 +429,8 @@ static int term_100(zebra_map_t zm, } i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } - dst_term[j] = '\0'; *src = s0; return i; } @@ -294,14 +438,13 @@ static int term_100(zebra_map_t zm, /* term_101: handle term, where trunc = Process # */ static int term_101(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term) + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "#", "#", !space_split)) + if (!term_pre(zm, src, "#", !space_split)) return 0; s0 = *src; while (*s0) @@ -310,7 +453,8 @@ static int term_101(zebra_map_t zm, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -321,11 +465,9 @@ static int term_101(zebra_map_t zm, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j++] = '\0'; *src = s0; return i; } @@ -333,14 +475,13 @@ static int term_101(zebra_map_t zm, /* term_103: handle term, where trunc = re-2 (regular expressions) */ static int term_103(zebra_map_t zm, const char **src, WRBUF term_dict, int *errors, int space_split, - char *dst_term) + WRBUF display_term) { int i = 0; - int j = 0; const char *s0; const char **map; - if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zm, src, "^\\()[].*+?|", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -355,7 +496,7 @@ static int term_103(zebra_map_t zm, const char **src, { if (strchr("^\\()[].*+?|-", *s0)) { - dst_term[j++] = *s0; + wrbuf_putc(display_term, *s0); wrbuf_putc(term_dict, *s0); s0++; i++; @@ -369,34 +510,31 @@ static int term_103(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j] = '\0'; *src = s0; - + return i; } /* term_103: handle term, where trunc = re-1 (regular expressions) */ static int term_102(zebra_map_t zm, const char **src, - WRBUF term_dict, int space_split, char *dst_term) + WRBUF term_dict, int space_split, WRBUF display_term) { - return term_103(zm, src, term_dict, NULL, space_split, dst_term); + return term_103(zm, src, term_dict, NULL, space_split, display_term); } -/* term_104: handle term, process # and ! */ -static int term_104(zebra_map_t zm, const char **src, - WRBUF term_dict, int space_split, char *dst_term) +/* term_104: handle term, process ?n * # */ +static int term_104(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "?*#", "?*#", !space_split)) + if (!term_pre(zm, src, "?*#", !space_split)) return 0; s0 = *src; while (*s0) @@ -404,14 +542,16 @@ static int term_104(zebra_map_t zm, const char **src, if (*s0 == '?') { i++; - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; if (*s0 >= '0' && *s0 <= '9') { int limit = 0; while (*s0 >= '0' && *s0 <= '9') { limit = limit * 10 + (*s0 - '0'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } if (limit > 20) limit = 20; @@ -429,13 +569,15 @@ static int term_104(zebra_map_t zm, const char **src, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '#') { i++; wrbuf_puts(term_dict, "."); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -446,26 +588,23 @@ static int term_104(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst_term[j++] = '\0'; *src = s0; return i; } -/* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105(zebra_map_t zm, const char **src, +/* term_105/106: handle term, process * ! and possibly right_truncate */ +static int term_105(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, - char *dst_term, int right_truncate) + WRBUF display_term, int right_truncate) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zm, src, "*!", "*!", !space_split)) + if (!term_pre(zm, src, "\\*!", !space_split)) return 0; s0 = *src; while (*s0) @@ -474,13 +613,22 @@ static int term_105(zebra_map_t zm, const char **src, { i++; wrbuf_puts(term_dict, ".*"); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '!') { i++; wrbuf_putc(term_dict, '.'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; + } + else if (*s0 == '\\') + { + i++; + wrbuf_puts(term_dict, "\\\\"); + wrbuf_putc(display_term, *s0); + s0++; } else { @@ -491,13 +639,11 @@ static int term_105(zebra_map_t zm, const char **src, break; i++; - add_non_space(s1, s0, term_dict, dst_term, &j, - map, q_map_match); + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } if (right_truncate) wrbuf_puts(term_dict, ".*"); - dst_term[j++] = '\0'; *src = s0; return i; } @@ -523,7 +669,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) strcpy(dst, "(-[0-9]+|("); else strcpy(dst, "(("); - } + } else { if (!islt) @@ -550,7 +696,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) if (d == '0') continue; d--; - } + } else { if (d == '9') @@ -558,7 +704,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) d++; } } - + strcpy(dst + dst_p, numstr); dst_p = strlen(dst) - pos - 1; @@ -578,7 +724,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt) else { if (d != '9') - { + { dst[dst_p++] = '['; dst[dst_p++] = d; dst[dst_p++] = '-'; @@ -642,7 +788,8 @@ void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx) static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, - zebra_map_t zm, int space_split, char *term_dst, + zebra_map_t zm, int space_split, + WRBUF display_term, int *error_code) { AttrType relation; @@ -658,18 +805,18 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; } yaz_log(log_level_rpn, "Relation <"); - + wrbuf_putc(term_dict, '('); for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; - + if (i) wrbuf_putc(term_dict, '|'); while (j < i) @@ -678,13 +825,13 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, '['); wrbuf_putc(term_dict, '^'); - + wrbuf_putc(term_dict, 1); wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); - + string_rel_add_char(term_dict, term_component, &i); wrbuf_putc(term_dict, '-'); - + wrbuf_putc(term_dict, ']'); wrbuf_putc(term_dict, '.'); wrbuf_putc(term_dict, '*'); @@ -692,7 +839,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 2: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -727,7 +874,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 5: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -742,7 +889,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, while (j < i) string_rel_add_char(term_dict, term_component, &j); wrbuf_putc(term_dict, '['); - + wrbuf_putc(term_dict, '^'); wrbuf_putc(term_dict, '-'); string_rel_add_char(term_dict, term_component, &i); @@ -760,7 +907,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 4: - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -801,7 +948,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!**term_sub) return 1; yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zm, term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) { wrbuf_destroy(term_component); return 0; @@ -826,27 +973,29 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, + const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol); - -static ZEBRA_RES term_limits_APT(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - zint *hits_limit_value, - const char **term_ref_id_str, - NMEM nmem) + struct ord_list **ol, + zebra_map_t zm); + +ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + zint *hits_limit_value, + const char **term_ref_id_str, + NMEM nmem) { AttrType term_ref_id_attr; AttrType hits_limit_attr; int term_ref_id_int; - + zint hits_limit_from_attr; + attr_init_APT(&hits_limit_attr, zapt, 11); - *hits_limit_value = attr_find(&hits_limit_attr, NULL); + hits_limit_from_attr = attr_find(&hits_limit_attr, NULL); attr_init_APT(&term_ref_id_attr, zapt, 10); term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str); @@ -856,84 +1005,73 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh, sprintf(res, "%d", term_ref_id_int); *term_ref_id_str = res; } + if (hits_limit_from_attr != -1) + *hits_limit_value = hits_limit_from_attr; - /* no limit given ? */ - if (*hits_limit_value == -1) - { - if (*term_ref_id_str) - { - /* use global if term_ref is present */ - *hits_limit_value = zh->approx_limit; - } - else - { - /* no counting if term_ref is not present */ - *hits_limit_value = 0; - } - } - else if (*hits_limit_value == 0) - { - /* 0 is the same as global limit */ - *hits_limit_value = zh->approx_limit; - } yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, *term_ref_id_str ? *term_ref_id_str : "none", *hits_limit_value); return ZEBRA_OK; } -static ZEBRA_RES term_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char **term_sub, - const Odr_oid *attributeSet, NMEM stream, - struct grep_info *grep_info, - const char *index_type, int complete_flag, - char *term_dst, - const char *rank_type, - const char *xpath_use, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** \brief search for term (which may be truncated) + */ +static ZEBRA_RES search_term(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char **term_sub, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + struct grep_info *grep_info, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc, + zebra_map_t zm) { ZEBRA_RES res; struct ord_list *ol; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; WRBUF term_dict = wrbuf_alloc(); - + WRBUF display_term = wrbuf_alloc(); *rset = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, term_dict, attributeSet, stream, grep_info, index_type, complete_flag, - term_dst, xpath_use, &ol); + display_term, xpath_use, &ol, zm); wrbuf_destroy(term_dict); - if (res != ZEBRA_OK) - return res; - if (!*term_sub) /* no more terms ? */ - return res; - yaz_log(log_level_rpn, "term: %s", term_dst); - *rset = rset_trunc(zh, grep_info->isam_p_buf, - grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */, - zapt->term->which, rset_nmem, - kc, kc->scope, ol, index_type, hits_limit_value, - term_ref_id_str); - if (!*rset) - return ZEBRA_FAIL; - return ZEBRA_OK; + if (res == ZEBRA_OK && *term_sub) + { + yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term)); + *rset = rset_trunc(zh, grep_info->isam_p_buf, + grep_info->isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, + 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, index_type, hits_limit_value, + term_ref_id_str); + if (!*rset) + res = ZEBRA_FAIL; + } + wrbuf_destroy(display_term); + return res; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, + const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol) + struct ord_list **ol, + zebra_map_t zm) { int r; AttrType truncation; @@ -948,8 +1086,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_error; char ord_buf[32]; int ord_len, i; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type); - + *ol = ord_list_create(stream); rpn_char_map_prepare(zh->reg, zm, &rcmi); @@ -958,136 +1095,205 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, yaz_log(log_level_rpn, "truncation value %d", truncation_value); termp = *term_sub; /* start of term for each database */ - + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, attributeSet, &ord) != ZEBRA_OK) { *term_sub = 0; return ZEBRA_FAIL; } - + wrbuf_rewind(term_dict); /* new dictionary regexp term */ - + *ol = ord_list_append(stream, *ol, ord); ord_len = key_SU_encode(ord, ord_buf); - + wrbuf_putc(term_dict, '('); - + for (i = 0; ireg->dict, wrbuf_cstr(term_dict), regex_range, - grep_info, &max_pos, + grep_info, &max_pos, ord_len /* number of "exact" chars */, grep_handle); if (r == 1) @@ -1123,7 +1334,7 @@ static void grep_info_delete(struct grep_info *grep_info) static ZEBRA_RES grep_info_prepare(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, - int reg_type) + const char *index_type) { #ifdef TERM_COUNT grep_info->term_no = 0; @@ -1132,7 +1343,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, grep_info->isam_p_size = 0; grep_info->isam_p_buf = NULL; grep_info->zh = zh; - grep_info->reg_type = reg_type; + grep_info->index_type = index_type; grep_info->termset = 0; if (zapt) { @@ -1163,12 +1374,12 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, const char *termset_name = 0; if (termset_value_numeric != -2) { - + sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } else - termset_name = termset_value_string; + termset_name = termset_value_string; yaz_log(log_level_rpn, "creating termset set %s", termset_name); grep_info->termset = resultSetAdd(zh, termset_name, 1); if (!grep_info->termset) @@ -1181,65 +1392,49 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, } return ZEBRA_OK; } - -/** - \brief Create result set(s) for list of terms - \param zh Zebra Handle - \param zapt Attributes Plust Term (RPN leaf) - \param termz term as used in query but converted to UTF-8 - \param attributeSet default attribute set - \param stream memory for result - \param index_type register type ("w", "p",..) - \param complete_flag whether it's phrases or not - \param rank_type term flags for ranking - \param xpath_use use attribute for X-Path (-1 for no X-path) - \param rset_nmem memory for result sets - \param result_sets output result set for each term in list (output) - \param num_result_sets number of output result sets - \param kc rset key control to be used for created result sets -*/ -static ZEBRA_RES term_list_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz, - const Odr_oid *attributeSet, - NMEM stream, - const char *index_type, int complete_flag, - const char *rank_type, - const char *xpath_use, - NMEM rset_nmem, - RSET **result_sets, int *num_result_sets, - struct rset_key_control *kc) + +static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc, + zebra_map_t zm) { - char term_dst[IT_MAX_WORD+1]; struct grep_info grep_info; const char *termp = termz; int alloc_sets = 0; *num_result_sets = 0; - *term_dst = 0; - if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; - while(1) - { + while (1) + { ZEBRA_RES res; if (alloc_sets == *num_result_sets) { int add = 10; - RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * sizeof(*rnew)); if (alloc_sets) memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew)); alloc_sets = alloc_sets + add; *result_sets = rnew; } - res = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - index_type, complete_flag, - term_dst, rank_type, - xpath_use, rset_nmem, - &(*result_sets)[*num_result_sets], - kc); + res = search_term(zh, zapt, &termp, attributeSet, hits_limit, + stream, &grep_info, + index_type, complete_flag, + rank_type, + xpath_use, rset_nmem, + &(*result_sets)[*num_result_sets], + kc, zm); if (res != ZEBRA_OK) { int i; @@ -1259,13 +1454,53 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, return ZEBRA_OK; } -static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, - const char *index_type, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** + \brief Create result set(s) for list of terms + \param zh Zebra Handle + \param zapt Attributes Plust Term (RPN leaf) + \param termz term as used in query but converted to UTF-8 + \param attributeSet default attribute set + \param stream memory for result + \param index_type register type ("w", "p",..) + \param complete_flag whether it's phrases or not + \param rank_type term flags for ranking + \param xpath_use use attribute for X-Path (-1 for no X-path) + \param rset_nmem memory for result sets + \param result_sets output result set for each term in list (output) + \param num_result_sets number of output result sets + \param kc rset key control to be used for created result sets +*/ +static ZEBRA_RES search_terms_list(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc) +{ + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, result_sets, num_result_sets, + kc, zm); +} + + +/** \brief limit a search by position - returns result set + */ +static ZEBRA_RES search_position(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + const char *index_type, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { int position_value; AttrType position; @@ -1275,8 +1510,8 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, int ord_len; char *val; ISAM_P isam_p; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type); - + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + attr_init_APT(&position, zapt, 3); position_value = attr_find(&position, NULL); switch(position_value) @@ -1301,13 +1536,6 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zh->reg->isamb && !zh->reg->isamc) - { - zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, - position_value); - return ZEBRA_FAIL; - } - if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeSet, &ord) != ZEBRA_OK) { @@ -1321,23 +1549,23 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, { assert(*val == sizeof(ISAM_P)); memcpy(&isam_p, val+1, sizeof(isam_p)); - - if (zh->reg->isamb) - *rset = rsisamb_create(rset_nmem, kc, kc->scope, - zh->reg->isamb, isam_p, 0); - else if (zh->reg->isamc) - *rset = rsisamc_create(rset_nmem, kc, kc->scope, - zh->reg->isamc, isam_p, 0); + + *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, + isam_p, 0); } return ZEBRA_OK; } - + +/** \brief returns result set for phrase search + */ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, @@ -1347,11 +1575,11 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, index_type, complete_flag, - rank_type, xpath_use, - rset_nmem, - &result_sets, &num_result_sets, kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, kc); if (res != ZEBRA_OK) return res; @@ -1359,12 +1587,17 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, if (num_result_sets > 0) { RSET first_set = 0; - res = rpn_search_APT_position(zh, zapt, attributeSet, - index_type, - rset_nmem, &first_set, - kc); + res = search_position(zh, zapt, attributeSet, + index_type, + rset_nmem, &first_set, + kc); if (res != ZEBRA_OK) + { + int i; + for (i = 0; iscope, 2, tmp_set, @@ -1443,7 +1679,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, } } if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1454,14 +1690,17 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, return ZEBRA_OK; } +/** \brief returns result set for and-list search + */ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, + const char *index_type, int complete_flag, - const char *rank_type, + const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, @@ -1471,21 +1710,21 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, index_type, complete_flag, - rank_type, xpath_use, - rset_nmem, - &result_sets, &num_result_sets, - kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, + kc); if (res != ZEBRA_OK) return res; for (i = 0; iscope, 2, tmp_set, @@ -1510,12 +1749,12 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else *rset = rset_create_and(rset_nmem, kc, kc->scope, - num_result_sets, result_sets); + num_result_sets, result_sets); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; @@ -1528,7 +1767,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, int *max_pos, zebra_map_t zm, - char *term_dst, + WRBUF display_term, int *error_code) { AttrType relation; @@ -1547,8 +1786,8 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) - { + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { wrbuf_destroy(term_num); return 0; } @@ -1557,7 +1796,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1567,7 +1806,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1577,7 +1816,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); return 0; @@ -1588,10 +1827,10 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, case -1: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zm, term_sub, term_num, 1, term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) { wrbuf_destroy(term_num); - return 0; + return 0; } term_value = atoi(wrbuf_cstr(term_num)); wrbuf_printf(term_dict, "(0*%d)", term_value); @@ -1603,10 +1842,10 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; - wrbuf_destroy(term_num); + wrbuf_destroy(term_num); return 0; } - r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, grep_info, max_pos, 0, grep_handle); if (r == 1) @@ -1619,12 +1858,12 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, + const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, - char *term_dst, + WRBUF display_term, const char *xpath_use, struct ord_list **ol) { @@ -1634,26 +1873,26 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_error = 0; int ord, ord_len, i; char ord_buf[32]; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type); - + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + *ol = ord_list_create(stream); rpn_char_map_prepare(zh->reg, zm, &rcmi); termp = *term_sub; - + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, attributeSet, &ord) != ZEBRA_OK) { return ZEBRA_FAIL; } - + wrbuf_rewind(term_dict); - + *ol = ord_list_append(stream, *ol, ord); - + ord_len = key_SU_encode(ord, ord_buf); - + wrbuf_putc(term_dict, '('); for (i = 0; i < ord_len; i++) { @@ -1661,10 +1900,10 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ord_buf[i]); } wrbuf_putc(term_dict, ')'); - + if (!numeric_relation(zh, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos, zm, - term_dst, &relation_error)) + display_term, &relation_error)) { if (relation_error) { @@ -1679,43 +1918,45 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } - + static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - const char *index_type, + const char *index_type, int complete_flag, - const char *rank_type, + const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; const char *termp = termz; RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res; struct grep_info grep_info; int alloc_sets = 0; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); - if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) - { + { struct ord_list *ol; WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; - RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * sizeof(*rnew)); if (alloc_sets) memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew)); @@ -1727,20 +1968,24 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, res = numeric_term(zh, zapt, &termp, term_dict, attributeSet, stream, &grep_info, index_type, complete_flag, - term_dst, xpath_use, &ol); + display_term, xpath_use, &ol); wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) + { + wrbuf_destroy(display_term); break; - yaz_log(YLOG_DEBUG, "term: %s", term_dst); + } + yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term)); result_sets[num_result_sets] = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term_dst, - strlen(term_dst), rank_type, + grep_info.isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, 0 /* preserve position */, - zapt->term->which, rset_nmem, + zapt->term->which, rset_nmem, kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); + wrbuf_destroy(display_term); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -1774,7 +2019,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, { Record rec; zint sysno = atozint(termz); - + if (sysno <= 0) sysno = 0; rec = rec_get(zh->reg->records, sysno); @@ -1794,7 +2039,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, *rset = rset_create_temp(rset_nmem, kc, kc->scope, res_get(zh->res, "setTmpDir"), 0); rsfd = rset_open(*rset, RSETF_WRITE); - + key.mem[0] = sysno; key.mem[1] = 1; key.len = 2; @@ -1818,7 +2063,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, Z_SortKeySpec *sks; Z_SortKey *sk; char termz[20]; - + attr_init_APT(&sort_relation_type, zapt, 7); sort_relation_value = attr_find(&sort_relation_type, &attributeSet); @@ -1827,7 +2072,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sort_sequence->num_specs = 10; sort_sequence->specs = (Z_SortKeySpec **) nmem_malloc(stream, sort_sequence->num_specs * - sizeof(*sort_sequence->specs)); + sizeof(*sort_sequence->specs)); for (i = 0; inum_specs; i++) sort_sequence->specs[i] = 0; } @@ -1835,7 +2080,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, i = 0; else i = atoi_n((char *) zapt->term->u.general->buf, - zapt->term->u.general->len); + zapt->term->u.general->len); if (i >= sort_sequence->num_specs) i = 0; sprintf(termz, "%d", i); @@ -1853,16 +2098,16 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; - sks->sortRelation = (int *) + sks->sortRelation = (Odr_int *) nmem_malloc(stream, sizeof(*sks->sortRelation)); if (sort_relation_value == 1) *sks->sortRelation = Z_SortKeySpec_ascending; else if (sort_relation_value == 2) *sks->sortRelation = Z_SortKeySpec_descending; - else + else *sks->sortRelation = Z_SortKeySpec_ascending; - sks->caseSensitivity = (int *) + sks->caseSensitivity = (Odr_int *) nmem_malloc(stream, sizeof(*sks->caseSensitivity)); *sks->caseSensitivity = 0; @@ -1882,7 +2127,7 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; - + attr_init_APT(&use, zapt, 1); attr_find_ex(&use, &curAttributeSet, &use_string); @@ -1891,27 +2136,27 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return zebra_parse_xpath_str(use_string, xpath, max, mem); } - - + + static RSET xpath_trunc(ZebraHandle zh, NMEM stream, - const char *index_type, const char *term, + const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc) { struct grep_info grep_info; - int ord = zebraExplain_lookup_attr_str(zh->reg->zei, + int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, index_type, xpath_use); - if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) + if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); - + if (ord < 0) return rset_create_null(rset_nmem, kc, 0); else { - int i, r, max_pos; + int i, max_pos; char ord_buf[32]; RSET rset; WRBUF term_dict = wrbuf_alloc(); @@ -1927,10 +2172,10 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, } wrbuf_putc(term_dict, ')'); wrbuf_puts(term_dict, term); - + grep_info.isam_p_indx = 0; - r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, - &grep_info, &max_pos, 0, grep_handle); + dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); yaz_log(YLOG_DEBUG, "%s %d positions", term, grep_info.isam_p_indx); rset = rset_trunc(zh, grep_info.isam_p_buf, @@ -1969,30 +2214,30 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } /* - //a -> a/.* - //a/b -> b/a/.* - /a -> a/ - /a/b -> b/a/ - - / -> none - - a[@attr = value]/b[@other = othervalue] - - /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) - /a/b val range(b/a/,freetext(w,1016,val),b/a/) - /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) - /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) - /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) - /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) - + //a -> a/.* + //a/b -> b/a/.* + /a -> a/ + /a/b -> b/a/ + + / -> none + + a[@attr = value]/b[@other = othervalue] + + /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) + /a/b val range(b/a/,freetext(w,1016,val),b/a/) + /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) + /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) + /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) + /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) + */ dict_grep_cmap(zh->reg->dict, 0, 0); - + { int level = xpath_len; int first_path = 1; - + while (--level >= 0) { WRBUF xpath_rev = wrbuf_alloc(); @@ -2032,7 +2277,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, { const char *cp = xpath[level].predicate->u.relation.value; wrbuf_putc(wbuf, '='); - + while (*cp) { if (strchr(REGEX_CHARS, *cp)) @@ -2042,12 +2287,12 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } } rset_attr = xpath_trunc( - zh, stream, "0", wrbuf_cstr(wbuf), - ZEBRA_XPATH_ATTR_NAME, + zh, stream, "0", wrbuf_cstr(wbuf), + ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); wrbuf_destroy(wbuf); - } - else + } + else { if (!first_path) { @@ -2055,23 +2300,23 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, continue; } } - yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, + yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { - rset_start_tag = xpath_trunc(zh, stream, "0", + rset_start_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), - ZEBRA_XPATH_ELM_BEGIN, + ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); if (always_matches) rset = rset_start_tag; else { - rset_end_tag = xpath_trunc(zh, stream, "0", + rset_end_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), - ZEBRA_XPATH_ELM_END, + ZEBRA_XPATH_ELM_END, rset_nmem, kc); - + rset = rset_create_between(rset_nmem, kc, kc->scope, rset_start_tag, rset, rset_end_tag, rset_attr); @@ -2087,18 +2332,20 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 -static ZEBRA_RES rpn_search_database(ZebraHandle zh, +static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc); static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -2116,7 +2363,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, res = ZEBRA_FAIL; break; } - res = rpn_search_database(zh, zapt, attributeSet, stream, + res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream, sort_sequence, rset_nmem, rsets+i, kc); if (res != ZEBRA_OK) @@ -2128,12 +2375,12 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_delete(rsets[i]); *rset = 0; } - else + else { if (num_bases == 1) *rset = rsets[0]; else if (num_bases == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */, num_bases, rsets); @@ -2141,9 +2388,10 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return res; } -static ZEBRA_RES rpn_search_database(ZebraHandle zh, +static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const Odr_oid *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, @@ -2167,7 +2415,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag); - + yaz_log(YLOG_DEBUG, "index_type=%s", index_type); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); @@ -2180,14 +2428,14 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence, rank_type, rset_nmem, rset, kc); /* consider if an X-Path query is used */ - xpath_len = rpn_check_xpath(zh, zapt, attributeSet, + xpath_len = rpn_check_xpath(zh, zapt, attributeSet, xpath, MAX_XPATH_STEPS, stream); if (xpath_len >= 0) { - if (xpath[xpath_len-1].part[0] == '@') + if (xpath[xpath_len-1].part[0] == '@') xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */ else - xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */ + xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */ if (1) { @@ -2200,7 +2448,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, if (relation_value == 103) /* alwaysmatches */ { *rset = 0; /* signal no "term" set */ - return rpn_search_xpath(zh, stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } } @@ -2208,7 +2456,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, /* search using one of the various search type strategies termz is our UTF-8 search term - attributeSet is top-level default attribute set + attributeSet is top-level default attribute set stream is ODR for search reg_id is the register type complete_flag is 1 for complete subfield, 0 for incomplete @@ -2216,7 +2464,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, */ if (!strcmp(search_type, "phrase")) { - res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2224,7 +2473,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "and-list")) { - res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2232,7 +2482,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "or-list")) { - res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2245,7 +2496,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "numeric")) { - res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem, @@ -2260,15 +2512,16 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, return res; if (!*rset) return ZEBRA_FAIL; - return rpn_search_xpath(zh, stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc); @@ -2293,9 +2546,9 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm; AttrType global_hits_limit_attr; int l; - + attr_init_APT(&global_hits_limit_attr, zapt, 12); - + l = attr_find(&global_hits_limit_attr, NULL); if (l != -1) *approx_limit = l; @@ -2305,10 +2558,11 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET *result_set) { RSET *result_sets = 0; @@ -2316,9 +2570,9 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, ZEBRA_RES res; struct rset_key_control *kc = zebra_key_control_create(zh); - res = rpn_search_structure(zh, zs, attributeSet, + res = rpn_search_structure(zh, zs, attributeSet, hits_limit, stream, rset_nmem, - sort_sequence, + sort_sequence, num_bases, basenames, &result_sets, &num_result_sets, 0 /* no parent op */, @@ -2342,10 +2596,10 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const Odr_oid *attributeSet, + const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc) @@ -2361,7 +2615,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, int num_result_sets_r = 0; res = rpn_search_structure(zh, zs->u.complex->s1, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_l, &num_result_sets_l, @@ -2374,7 +2628,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return res; } res = rpn_search_structure(zh, zs->u.complex->s2, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_r, &num_result_sets_r, @@ -2391,11 +2645,11 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* make a new list of result for all children */ *num_result_sets = num_result_sets_l + num_result_sets_r; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); - memcpy(*result_sets, result_sets_l, + memcpy(*result_sets, result_sets_l, num_result_sets_l * sizeof(**result_sets)); - memcpy(*result_sets + num_result_sets_l, result_sets_r, + memcpy(*result_sets + num_result_sets_l, result_sets_r, num_result_sets_r * sizeof(**result_sets)); if (!parent_op || parent_op->which != zop->which @@ -2426,7 +2680,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, case Z_Operator_prox: if (zop->u.prox->which != Z_ProximityOperator_known) { - zebra_setError(zh, + zebra_setError(zh, YAZ_BIB1_UNSUPP_PROX_UNIT_CODE, 0); return ZEBRA_FAIL; @@ -2442,9 +2696,9 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { rset = rset_create_prox(rset_nmem, kc, kc->scope, - *num_result_sets, *result_sets, + *num_result_sets, *result_sets, *zop->u.prox->ordered, - (!zop->u.prox->exclusion ? + (!zop->u.prox->exclusion ? 0 : *zop->u.prox->exclusion), *zop->u.prox->relationType, *zop->u.prox->distance ); @@ -2455,7 +2709,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return ZEBRA_FAIL; } *num_result_sets = 1; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); (*result_sets)[0] = rset; } @@ -2469,7 +2723,8 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { yaz_log(YLOG_DEBUG, "rpn_search_APT"); res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm, - attributeSet, stream, sort_sequence, + attributeSet, hits_limit, + stream, sort_sequence, num_bases, basenames, rset_nmem, &rset, kc); if (res != ZEBRA_OK) @@ -2481,7 +2736,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, rset = resultSetRef(zh, zs->u.simple->u.resultSetId); if (!rset) { - zebra_setError(zh, + zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, zs->u.simple->u.resultSetId); return ZEBRA_FAIL; @@ -2494,7 +2749,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return ZEBRA_FAIL; } *num_result_sets = 1; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); (*result_sets)[0] = rset; } @@ -2511,6 +2766,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab