X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=5fa197fe929bc6f30bf9820ee162ab79c181250f;hp=28a6670aecabf8e6af277f65abc4b0e7c31d0985;hb=03419e1f6a4ae8a5b255e7c215da40678c30bb25;hpb=1872e3fc60b482771bbd1cb4b0290b8d6a9ef5d0 diff --git a/index/rpnsearch.c b/index/rpnsearch.c index 28a6670..5fa197f 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,8 +1,5 @@ -/* $Id: rpnsearch.c,v 1.11 2007-04-16 08:44:32 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2010 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -45,7 +42,7 @@ static int log_level_rpn = 0; static const char **rpn_char_map_handler(void *vp, const char **from, int len) { struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp; - const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0); + const char **out = zebra_maps_input(p->zm, from, len, 0); #if 0 if (out && *out) { @@ -61,12 +58,14 @@ static const char **rpn_char_map_handler(void *vp, const char **from, int len) return out; } -void rpn_char_map_prepare(struct zebra_register *reg, int reg_type, +void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info) { - map_info->zm = reg->zebra_maps; - map_info->reg_type = reg_type; - dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); + map_info->zm = zm; + if (zebra_maps_is_icu(zm)) + dict_grep_cmap(reg->dict, 0, 0); + else + dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); } #define TERM_COUNT @@ -80,7 +79,7 @@ struct grep_info { int isam_p_indx; int trunc_max; ZebraHandle zh; - int reg_type; + const char *index_type; ZebraSet termset; }; @@ -109,7 +108,7 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_buf) { memcpy(new_isam_p_buf, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->isam_p_buf)); + p->isam_p_indx * sizeof(*p->isam_p_buf)); xfree(p->isam_p_buf); } p->isam_p_buf = new_isam_p_buf; @@ -119,7 +118,7 @@ static int add_isam_p(const char *name, const char *info, if (p->term_no) { memcpy(new_term_no, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->term_no)); + p->isam_p_indx * sizeof(*p->term_no)); xfree(p->term_no); } p->term_no = new_term_no; @@ -134,9 +133,9 @@ static int add_isam_p(const char *name, const char *info, char term_tmp[IT_MAX_WORD]; int ord = 0; const char *index_name; - int len = key_SU_decode (&ord, (const unsigned char *) name); + int len = key_SU_decode(&ord, (const unsigned char *) name); - zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len); + zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len); yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord(p->zh->reg->zei, ord, 0 /* index_type */, &db, &index_name); @@ -154,8 +153,8 @@ static int grep_handle(char *name, const char *info, void *p) return add_isam_p(name, info, (struct grep_info *) p); } -static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, - const char *ct1, const char *ct2, int first) +static int term_pre(zebra_map_t zm, const char **src, + const char *ct1, int first) { const char *s1, *s0 = *src; const char **map; @@ -165,10 +164,8 @@ static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, { if (ct1 && strchr(ct1, *s0)) break; - if (ct2 && strchr(ct2, *s0)) - break; s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first); + map = zebra_maps_input(zm, &s1, strlen(s1), first); if (**map != *CHR_SPACE) break; s0 = s1; @@ -204,30 +201,108 @@ static void esc_str(char *out_buf, size_t out_size, } } -#define REGEX_CHARS " []()|.*+?!" +#define REGEX_CHARS " ^[]()|.*+?!\"$" + +static void add_non_space(const char *start, const char *end, + WRBUF term_dict, + WRBUF display_term, + const char **map, int q_map_match) +{ + size_t sz = end - start; + + wrbuf_write(display_term, start, sz); + if (!q_map_match) + { + while (start < end) + { + if (strchr(REGEX_CHARS, *start)) + wrbuf_putc(term_dict, '\\'); + wrbuf_putc(term_dict, *start); + start++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + wrbuf_puts(term_dict, map[0]); + } +} + + +static int term_100_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term, + int right_trunc) +{ + int i; + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + *src += strlen(*src); + return 0; + } + wrbuf_write(display_term, display_buf, display_len); + if (right_trunc) + { + /* ICU sort keys seem to be of the form + basechars \x01 accents \x01 length + For now we'll just right truncate from basechars . This + may give false hits due to accents not being used. + */ + i = res_len; + while (--i >= 0 && res_buf[i] != '\x01') + ; + if (i > 0) + { + while (--i >= 0 && res_buf[i] != '\x01') + ; + } + if (i == 0) + { /* did not find base chars at all. Throw error */ + return -1; + } + res_len = i; /* reduce res_len */ + } + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, 1); + + wrbuf_putc(term_dict, res_buf[i]); + } + if (right_trunc) + wrbuf_puts(term_dict, ".*"); + return 1; +} /* term_100: handle term, where trunc = none(no operators at all) */ -static int term_100(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) +static int term_100(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; const char *space_start = 0; const char *space_end = 0; - if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split)) + if (!term_pre(zm, src, 0, !space_split)) return 0; s0 = *src; while (*s0) { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split) { if (**map == *CHR_SPACE) @@ -246,110 +321,71 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, while (space_start < space_end) { if (strchr(REGEX_CHARS, *space_start)) - dst[i++] = '\\'; - dst_term[j++] = *space_start; - dst[i++] = *space_start++; + wrbuf_putc(term_dict, '\\'); + wrbuf_putc(display_term, *space_start); + wrbuf_putc(term_dict, *space_start); + space_start++; + } /* and reset */ space_start = space_end = 0; } } - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } - dst[i] = '\0'; - dst_term[j] = '\0'; *src = s0; return i; } /* term_101: handle term, where trunc = Process # */ -static int term_101(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) +static int term_101(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split)) + if (!term_pre(zm, src, "#", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '#') { - dst[i++] = '.'; - dst[i++] = '*'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, ".*"); + wrbuf_putc(display_term, *s0); + s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst[i] = '\0'; - dst_term[j++] = '\0'; *src = s0; return i; } /* term_103: handle term, where trunc = re-2 (regular expressions) */ -static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split, - char *dst_term) +static int term_103(zebra_map_t zm, const char **src, + WRBUF term_dict, int *errors, int space_split, + WRBUF display_term) { int i = 0; - int j = 0; const char *s0; const char **map; - if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zm, src, "^\\()[].*+?|", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -364,209 +400,154 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, { if (strchr("^\\()[].*+?|-", *s0)) { - dst_term[j++] = *s0; - dst[i++] = *s0++; + wrbuf_putc(display_term, *s0); + wrbuf_putc(term_dict, *s0); + s0++; + i++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst[i] = '\0'; - dst_term[j] = '\0'; *src = s0; return i; } /* term_103: handle term, where trunc = re-1 (regular expressions) */ -static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split, char *dst_term) +static int term_102(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, WRBUF display_term) { - return term_103(zebra_maps, reg_type, src, dst, NULL, space_split, - dst_term); + return term_103(zm, src, term_dict, NULL, space_split, display_term); } -/* term_104: handle term, where trunc = Process # and ! */ -static int term_104(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) +/* term_104: handle term, process # and ! */ +static int term_104(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split)) + if (!term_pre(zm, src, "?*#", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '?') { - dst_term[j++] = *s0++; + i++; + wrbuf_putc(display_term, *s0); + s0++; if (*s0 >= '0' && *s0 <= '9') { int limit = 0; while (*s0 >= '0' && *s0 <= '9') { limit = limit * 10 + (*s0 - '0'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } if (limit > 20) limit = 20; while (--limit >= 0) { - dst[i++] = '.'; - dst[i++] = '?'; + wrbuf_puts(term_dict, ".?"); } } else { - dst[i++] = '.'; - dst[i++] = '*'; + wrbuf_puts(term_dict, ".*"); } } else if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, ".*"); + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '#') { - dst[i++] = '.'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, "."); + wrbuf_putc(display_term, *s0); + s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst[i] = '\0'; - dst_term[j++] = '\0'; *src = s0; return i; } /* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term, int right_truncate) +static int term_105(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, + WRBUF display_term, int right_truncate) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split)) + if (!term_pre(zm, src, "\\*!", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, ".*"); + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '!') { - dst[i++] = '.'; - dst_term[j++] = *s0++; + i++; + wrbuf_putc(term_dict, '.'); + wrbuf_putc(display_term, *s0); + s0++; + } + else if (*s0 == '\\') + { + i++; + wrbuf_puts(term_dict, "\\\\"); + wrbuf_putc(display_term, *s0); + s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } if (right_truncate) - { - dst[i++] = '.'; - dst[i++] = '*'; - } - dst[i] = '\0'; - - dst_term[j++] = '\0'; + wrbuf_puts(term_dict, ".*"); *src = s0; return i; } @@ -576,8 +557,10 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, * val: border value (inclusive) * islt: 1 if <=; 0 if >=. */ -static void gen_regular_rel(char *dst, int val, int islt) +static void gen_regular_rel(WRBUF term_dict, int val, int islt) { + char dst_buf[20*5*20]; /* assuming enough for expansion */ + char *dst = dst_buf; int dst_p; int w, d, i; int pos = 0; @@ -596,7 +579,6 @@ static void gen_regular_rel(char *dst, int val, int islt) if (!islt) { strcpy(dst, "([0-9]+|-("); - dst_p = strlen(dst); islt = 1; } else @@ -682,13 +664,19 @@ static void gen_regular_rel(char *dst, int val, int islt) strcat(dst, "[0-9]*"); } strcat(dst, "))"); + wrbuf_puts(term_dict, dst); } -void string_rel_add_char(char **term_p, const char *src, int *indx) +void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx) { + const char *src = wrbuf_cstr(wsrc); if (src[*indx] == '\\') - *(*term_p)++ = src[(*indx)++]; - *(*term_p)++ = src[(*indx)++]; + { + wrbuf_putc(term_p, src[*indx]); + (*indx)++; + } + wrbuf_putc(term_p, src[*indx]); + (*indx)++; } /* @@ -702,16 +690,16 @@ void string_rel_add_char(char **term_p, const char *src, int *indx) * ([^a-].*|a[^b-].*|ab[^c-].*|abc) */ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, char *term_dict, - const int *attributeSet, - int reg_type, int space_split, char *term_dst, + const char **term_sub, WRBUF term_dict, + const Odr_oid *attributeSet, + zebra_map_t zm, int space_split, + WRBUF display_term, int *error_code) { AttrType relation; int relation_value; int i; - char *term_tmp = term_dict + strlen(term_dict); - char term_component[2*IT_MAX_WORD+20]; + WRBUF term_component = wrbuf_alloc(); attr_init_APT(&relation, zapt, 2); relation_value = attr_find(&relation, NULL); @@ -721,153 +709,142 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100(zh->reg->zebra_maps, reg_type, - term_sub, term_component, - space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <"); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; - + if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - - *term_tmp++ = '['; - - *term_tmp++ = '^'; - - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + string_rel_add_char(term_dict, term_component, &j); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + wrbuf_putc(term_dict, '['); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '^'); + + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); + + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); + + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; - yaz_log(YLOG_LOG, "term_dict=%s", term_dict); + wrbuf_putc(term_dict, ')'); break; case 2: - if (!term_100(zh->reg->zebra_maps, reg_type, - term_sub, term_component, - space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <="); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; - - *term_tmp++ = '^'; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + wrbuf_putc(term_dict, '^'); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i]; ) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, ')'); break; case 5: - if (!term_100 (zh->reg->zebra_maps, reg_type, - term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >"); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - *term_tmp++ = '|'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i];) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '.'; - *term_tmp++ = '+'; - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '+'); + wrbuf_putc(term_dict, ')'); break; case 4: - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, - term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >="); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - if (term_component[i+1]) + if (i < wrbuf_len(term_component)-1) { - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); } else { - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); } - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; + wrbuf_putc(term_dict, ')'); break; case 3: case 102: @@ -875,12 +852,14 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!**term_sub) return 1; yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, - term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; - strcat(term_tmp, "("); - strcat(term_tmp, term_component); - strcat(term_tmp, ")"); + } + wrbuf_puts(term_dict, "("); + wrbuf_puts(term_dict, wrbuf_cstr(term_component)); + wrbuf_puts(term_dict, ")"); break; case 103: yaz_log(log_level_rpn, "Relation always matches"); @@ -890,33 +869,37 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; + wrbuf_destroy(term_component); return 0; } + wrbuf_destroy(term_component); return 1; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - const int *attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, + const char *index_type, int complete_flag, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol); - -static ZEBRA_RES term_limits_APT(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - zint *hits_limit_value, - const char **term_ref_id_str, - NMEM nmem) + struct ord_list **ol, + zebra_map_t zm); + +ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + zint *hits_limit_value, + const char **term_ref_id_str, + NMEM nmem) { AttrType term_ref_id_attr; AttrType hits_limit_attr; int term_ref_id_int; + zint hits_limit_from_attr; attr_init_APT(&hits_limit_attr, zapt, 11); - *hits_limit_value = attr_find(&hits_limit_attr, NULL); + hits_limit_from_attr = attr_find(&hits_limit_attr, NULL); attr_init_APT(&term_ref_id_attr, zapt, 10); term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str); @@ -926,270 +909,296 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh, sprintf(res, "%d", term_ref_id_int); *term_ref_id_str = res; } + if (hits_limit_from_attr != -1) + *hits_limit_value = hits_limit_from_attr; - /* no limit given ? */ - if (*hits_limit_value == -1) - { - if (*term_ref_id_str) - { - /* use global if term_ref is present */ - *hits_limit_value = zh->approx_limit; - } - else - { - /* no counting if term_ref is not present */ - *hits_limit_value = 0; - } - } - else if (*hits_limit_value == 0) - { - /* 0 is the same as global limit */ - *hits_limit_value = zh->approx_limit; - } yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, *term_ref_id_str ? *term_ref_id_str : "none", *hits_limit_value); return ZEBRA_OK; } -static ZEBRA_RES term_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char **term_sub, - const int *attributeSet, NMEM stream, - struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, - const char *rank_type, - const char *xpath_use, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** \brief search for term (which may be truncated) + */ +static ZEBRA_RES search_term(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char **term_sub, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + struct grep_info *grep_info, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc, + zebra_map_t zm) { ZEBRA_RES res; struct ord_list *ol; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; + WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); *rset = 0; - - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); grep_info->isam_p_indx = 0; - res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, - reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use, &ol); - if (res != ZEBRA_OK) - return res; - if (!*term_sub) /* no more terms ? */ - return res; - yaz_log(log_level_rpn, "term: %s", term_dst); - *rset = rset_trunc(zh, grep_info->isam_p_buf, - grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */, - zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, hits_limit_value, - term_ref_id_str); - if (!*rset) - return ZEBRA_FAIL; - return ZEBRA_OK; + res = string_term(zh, zapt, term_sub, term_dict, + attributeSet, stream, grep_info, + index_type, complete_flag, + display_term, xpath_use, &ol, zm); + wrbuf_destroy(term_dict); + if (res == ZEBRA_OK && *term_sub) + { + yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term)); + *rset = rset_trunc(zh, grep_info->isam_p_buf, + grep_info->isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, + 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, index_type, hits_limit_value, + term_ref_id_str); + if (!*rset) + res = ZEBRA_FAIL; + } + wrbuf_destroy(display_term); + return res; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - const int *attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, + const char *index_type, int complete_flag, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol) + struct ord_list **ol, + zebra_map_t zm) { - char term_dict[2*IT_MAX_WORD+4000]; - int j, r, base_no; + int r; AttrType truncation; int truncation_value; const char *termp; struct rpn_char_map_info rcmi; - int space_split = complete_flag ? 0 : 1; - int bases_ok = 0; /* no of databases with OK attribute */ + int space_split = complete_flag ? 0 : 1; + int ord = -1; + int regex_range = 0; + int max_pos, prefix_len = 0; + int relation_error; + char ord_buf[32]; + int ord_len, i; *ol = ord_list_create(stream); - rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, zm, &rcmi); attr_init_APT(&truncation, zapt, 5); truncation_value = attr_find(&truncation, NULL); yaz_log(log_level_rpn, "truncation value %d", truncation_value); - for (base_no = 0; base_no < num_bases; base_no++) + termp = *term_sub; /* start of term for each database */ + + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) { - int ord = -1; - int regex_range = 0; - int max_pos, prefix_len = 0; - int relation_error; - char ord_buf[32]; - int ord_len, i; - - termp = *term_sub; /* start of term for each database */ + *term_sub = 0; + return ZEBRA_FAIL; + } + + wrbuf_rewind(term_dict); /* new dictionary regexp term */ + + *ol = ord_list_append(stream, *ol, ord); + ord_len = key_SU_encode(ord, ord_buf); + + wrbuf_putc(term_dict, '('); + + for (i = 0; ireg->zei, basenames[base_no])) + if (zebra_maps_is_icu(zm)) + { + int relation_value; + AttrType relation; + + attr_init_APT(&relation, zapt, 2); + relation_value = attr_find(&relation, NULL); + if (relation_value == 103) /* always matches */ + termp += strlen(termp); /* move to end of term */ + else if (relation_value == 3 || relation_value == 102 || relation_value == -1) { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - return ZEBRA_FAIL; + /* ICU case */ + switch (truncation_value) + { + case -1: /* not specified */ + case 100: /* do not truncate */ + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 1: /* right truncation */ + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); + return ZEBRA_FAIL; + } } - - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, - attributeSet, &ord) != ZEBRA_OK) - continue; - - bases_ok++; - - *ol = ord_list_append(stream, *ol, ord); - ord_len = key_SU_encode (ord, ord_buf); - - term_dict[prefix_len++] = '('; - for (i = 0; ireg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - strcat(term_dict, ".*)"); + wrbuf_putc(term_dict, '('); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); break; - case 2: /* keft truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - strcat(term_dict, ")"); + case 2: /* left truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); break; case 3: /* left&right truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - strcat(term_dict, ".*)"); + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); break; case 101: /* process # in term */ - term_dict[j++] = '('; - if (!term_101(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_101(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_puts(term_dict, ")"); break; case 102: /* Regexp-1 */ - term_dict[j++] = '('; - if (!term_102(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_102(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; case 103: /* Regexp-2 */ regex_range = 1; - term_dict[j++] = '('; - if (!term_103(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, ®ex_range, - space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_103(zm, &termp, term_dict, ®ex_range, + space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); - break; + } + wrbuf_putc(term_dict, ')'); + break; case 104: /* process # and ! in term */ - term_dict[j++] = '('; - if (!term_104(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_104(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; case 105: /* process * and ! in term */ - term_dict[j++] = '('; - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 1)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; case 106: /* process * and ! in term */ - term_dict[j++] = '('; - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 0)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; - default: - zebra_setError_zint(zh, - YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, - truncation_value); - return ZEBRA_FAIL; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); + return ZEBRA_FAIL; } - if (1) - { - char buf[80]; - const char *input = term_dict + prefix_len; - esc_str(buf, sizeof(buf), input, strlen(input)); - } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len); - r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range, - grep_info, &max_pos, - ord_len /* number of "exact" chars */, - grep_handle); - if (r == 1) - zebra_set_partial_result(zh); - else if (r) - yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); - } - if (!bases_ok) - return ZEBRA_FAIL; + } + if (1) + { + char buf[1000]; + const char *input = wrbuf_cstr(term_dict) + prefix_len; + esc_str(buf, sizeof(buf), input, strlen(input)); + } + { + WRBUF pr_wr = wrbuf_alloc(); + + wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict)); + yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr)); + wrbuf_destroy(pr_wr); + } + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, + grep_info, &max_pos, + ord_len /* number of "exact" chars */, + grep_handle); + if (r == 1) + zebra_set_partial_result(zh); + else if (r) + yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; @@ -1208,7 +1217,7 @@ static void grep_info_delete(struct grep_info *grep_info) static ZEBRA_RES grep_info_prepare(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, - int reg_type) + const char *index_type) { #ifdef TERM_COUNT grep_info->term_no = 0; @@ -1217,7 +1226,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, grep_info->isam_p_size = 0; grep_info->isam_p_buf = NULL; grep_info->zh = zh; - grep_info->reg_type = reg_type; + grep_info->index_type = index_type; grep_info->termset = 0; if (zapt) { @@ -1253,7 +1262,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, termset_name = resname; } else - termset_name = termset_value_string; + termset_name = termset_value_string; yaz_log(log_level_rpn, "creating termset set %s", termset_name); grep_info->termset = resultSetAdd(zh, termset_name, 1); if (!grep_info->termset) @@ -1266,48 +1275,29 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, } return ZEBRA_OK; } - -/** - \brief Create result set(s) for list of terms - \param zh Zebra Handle - \param zapt Attributes Plust Term (RPN leaf) - \param termz term as used in query but converted to UTF-8 - \param attributeSet default attribute set - \param stream memory for result - \param reg_type register type ('w', 'p',..) - \param complete_flag whether it's phrases or not - \param rank_type term flags for ranking - \param xpath_use use attribute for X-Path (-1 for no X-path) - \param num_bases number of databases - \param basenames array of databases - \param rset_nmem memory for result sets - \param result_sets output result set for each term in list (output) - \param num_result_sets number of output result sets - \param kc rset key control to be used for created result sets -*/ -static ZEBRA_RES term_list_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz, - const int *attributeSet, - NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, - const char *xpath_use, - int num_bases, char **basenames, - NMEM rset_nmem, - RSET **result_sets, int *num_result_sets, - struct rset_key_control *kc) + +static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc, + zebra_map_t zm) { - char term_dst[IT_MAX_WORD+1]; struct grep_info grep_info; const char *termp = termz; int alloc_sets = 0; - + *num_result_sets = 0; - *term_dst = 0; - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; - while(1) + while (1) { ZEBRA_RES res; @@ -1321,20 +1311,19 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, alloc_sets = alloc_sets + add; *result_sets = rnew; } - res = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - reg_type, complete_flag, - num_bases, basenames, - term_dst, rank_type, - xpath_use, rset_nmem, - &(*result_sets)[*num_result_sets], - kc); + res = search_term(zh, zapt, &termp, attributeSet, hits_limit, + stream, &grep_info, + index_type, complete_flag, + rank_type, + xpath_use, rset_nmem, + &(*result_sets)[*num_result_sets], + kc, zm); if (res != ZEBRA_OK) { int i; for (i = 0; i < *num_result_sets; i++) rset_delete((*result_sets)[i]); - grep_info_delete (&grep_info); + grep_info_delete(&grep_info); return res; } if ((*result_sets)[*num_result_sets] == 0) @@ -1347,22 +1336,67 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, grep_info_delete(&grep_info); return ZEBRA_OK; } + +/** + \brief Create result set(s) for list of terms + \param zh Zebra Handle + \param zapt Attributes Plust Term (RPN leaf) + \param termz term as used in query but converted to UTF-8 + \param attributeSet default attribute set + \param stream memory for result + \param index_type register type ("w", "p",..) + \param complete_flag whether it's phrases or not + \param rank_type term flags for ranking + \param xpath_use use attribute for X-Path (-1 for no X-path) + \param rset_nmem memory for result sets + \param result_sets output result set for each term in list (output) + \param num_result_sets number of output result sets + \param kc rset key control to be used for created result sets +*/ +static ZEBRA_RES search_terms_list(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc) +{ + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + if (zebra_maps_is_icu(zm)) + zebra_map_tokenize_start(zm, termz, strlen(termz)); + return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, result_sets, num_result_sets, + kc, zm); +} -static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const int *attributeSet, - int reg_type, - int num_bases, char **basenames, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) + +/** \brief limit a search by position - returns result set + */ +static ZEBRA_RES search_position(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + const char *index_type, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { - RSET *f_set; - int base_no; int position_value; - int num_sets = 0; AttrType position; - + int ord = -1; + char ord_buf[32]; + char term_dict[100]; + int ord_len; + char *val; + ISAM_P isam_p; + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + attr_init_APT(&position, zapt, 3); position_value = attr_find(&position, NULL); switch(position_value) @@ -1379,75 +1413,46 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type)) + + if (!zebra_maps_is_first_in_field(zm)) { zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, position_value); return ZEBRA_FAIL; } - if (!zh->reg->isamb && !zh->reg->isamc) + if (zebra_apt_get_ord(zh, zapt, index_type, 0, + attributeSet, &ord) != ZEBRA_OK) { - zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, - position_value); return ZEBRA_FAIL; } - f_set = xmalloc(sizeof(RSET) * num_bases); - for (base_no = 0; base_no < num_bases; base_no++) + ord_len = key_SU_encode(ord, ord_buf); + memcpy(term_dict, ord_buf, ord_len); + strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); + val = dict_lookup(zh->reg->dict, term_dict); + if (val) { - int ord = -1; - char ord_buf[32]; - char term_dict[100]; - int ord_len; - char *val; - ISAM_P isam_p; - - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - return ZEBRA_FAIL; - } - - if (zebra_apt_get_ord(zh, zapt, reg_type, 0, - attributeSet, &ord) != ZEBRA_OK) - continue; - - ord_len = key_SU_encode (ord, ord_buf); - memcpy(term_dict, ord_buf, ord_len); - strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); - val = dict_lookup(zh->reg->dict, term_dict); - if (!val) - continue; assert(*val == sizeof(ISAM_P)); memcpy(&isam_p, val+1, sizeof(isam_p)); - - if (zh->reg->isamb) - f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope, - zh->reg->isamb, isam_p, 0); - else if (zh->reg->isamc) - f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope, - zh->reg->isamc, isam_p, 0); + *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, + isam_p, 0); } - if (num_sets) - { - *rset = rset_create_or(rset_nmem, kc, kc->scope, - 0 /* termid */, num_sets, f_set); - } - xfree(f_set); return ZEBRA_OK; } - + +/** \brief returns result set for phrase search + */ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - const int *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1455,26 +1460,29 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, - rank_type, xpath_use, - num_bases, basenames, - rset_nmem, - &result_sets, &num_result_sets, kc); - + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, kc); + if (res != ZEBRA_OK) return res; if (num_result_sets > 0) { RSET first_set = 0; - res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, - num_bases, basenames, - rset_nmem, &first_set, - kc); + res = search_position(zh, zapt, attributeSet, + index_type, + rset_nmem, &first_set, + kc); if (res != ZEBRA_OK) + { + int i; + for (i = 0; ireg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value-1, 1); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value-1, 1); break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 1); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 1); break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 0); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 0); break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value+1, 0); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value+1, 0); break; case -1: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) - return 0; - term_value = atoi (term_tmp); - sprintf(term_tmp, "(0*%d)", term_value); + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); + return 0; + } + term_value = atoi(wrbuf_cstr(term_num)); + wrbuf_printf(term_dict, "(0*%d)", term_value); break; case 103: /* term_tmp untouched.. */ @@ -1707,90 +1727,77 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; - return 0; + wrbuf_destroy(term_num); + return 0; } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos, - 0, grep_handle); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), + 0, grep_info, max_pos, 0, grep_handle); if (r == 1) zebra_set_partial_result(zh); else if (r) yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r); yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx); + wrbuf_destroy(term_num); return 1; } static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - const int *attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, + const char *index_type, int complete_flag, + WRBUF display_term, const char *xpath_use, struct ord_list **ol) { - char term_dict[2*IT_MAX_WORD+2]; - int base_no; const char *termp; struct rpn_char_map_info rcmi; - - int bases_ok = 0; /* no of databases with OK attribute */ - + int max_pos; + int relation_error = 0; + int ord, ord_len, i; + char ord_buf[32]; + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + *ol = ord_list_create(stream); - rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, zm, &rcmi); - for (base_no = 0; base_no < num_bases; base_no++) + termp = *term_sub; + + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) { - int max_pos, prefix_len = 0; - int relation_error = 0; - int ord, ord_len, i; - char ord_buf[32]; - - termp = *term_sub; - - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + return ZEBRA_FAIL; + } + + wrbuf_rewind(term_dict); + + *ol = ord_list_append(stream, *ol, ord); + + ord_len = key_SU_encode(ord, ord_buf); + + wrbuf_putc(term_dict, '('); + for (i = 0; i < ord_len; i++) + { + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, ord_buf[i]); + } + wrbuf_putc(term_dict, ')'); + + if (!numeric_relation(zh, zapt, &termp, term_dict, + attributeSet, grep_info, &max_pos, zm, + display_term, &relation_error)) + { + if (relation_error) { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); + zebra_setError(zh, relation_error, 0); return ZEBRA_FAIL; } - - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, - attributeSet, &ord) != ZEBRA_OK) - continue; - bases_ok++; - - *ol = ord_list_append(stream, *ol, ord); - - ord_len = key_SU_encode (ord, ord_buf); - - term_dict[prefix_len++] = '('; - for (i = 0; i < ord_len; i++) - { - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = ord_buf[i]; - } - term_dict[prefix_len++] = ')'; - term_dict[prefix_len] = '\0'; - - if (!numeric_relation(zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, reg_type, - term_dst, &relation_error)) - { - if (relation_error) - { - zebra_setError(zh, relation_error, 0); - return ZEBRA_FAIL; - } - *term_sub = 0; - return ZEBRA_OK; - } + *term_sub = 0; + return ZEBRA_OK; } - if (!bases_ok) - return ZEBRA_FAIL; *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; @@ -1800,17 +1807,16 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - const int *attributeSet, + const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; const char *termp = termz; RSET *result_sets = 0; int num_result_sets = 0; @@ -1820,14 +1826,17 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, zint hits_limit_value; const char *term_ref_id_str = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) { struct ord_list *ol; + WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; @@ -1840,21 +1849,27 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, } yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp); grep_info.isam_p_indx = 0; - res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info, - reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use, &ol); + res = numeric_term(zh, zapt, &termp, term_dict, + attributeSet, stream, &grep_info, + index_type, complete_flag, + display_term, xpath_use, &ol); + wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) + { + wrbuf_destroy(display_term); break; - yaz_log(YLOG_DEBUG, "term: %s", term_dst); + } + yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term)); result_sets[num_result_sets] = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term_dst, - strlen(term_dst), rank_type, + grep_info.isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, + kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); + wrbuf_destroy(display_term); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -1880,7 +1895,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - const int *attributeSet, + const Odr_oid *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, @@ -1919,7 +1934,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, } static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const int *attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, @@ -1941,15 +1956,15 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sort_sequence->num_specs = 10; sort_sequence->specs = (Z_SortKeySpec **) nmem_malloc(stream, sort_sequence->num_specs * - sizeof(*sort_sequence->specs)); + sizeof(*sort_sequence->specs)); for (i = 0; inum_specs; i++) sort_sequence->specs[i] = 0; } if (zapt->term->which != Z_Term_general) i = 0; else - i = atoi_n ((char *) zapt->term->u.general->buf, - zapt->term->u.general->len); + i = atoi_n((char *) zapt->term->u.general->buf, + zapt->term->u.general->len); if (i >= sort_sequence->num_specs) i = 0; sprintf(termz, "%d", i); @@ -1967,7 +1982,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; - sks->sortRelation = (int *) + sks->sortRelation = (Odr_int *) nmem_malloc(stream, sizeof(*sks->sortRelation)); if (sort_relation_value == 1) *sks->sortRelation = Z_SortKeySpec_ascending; @@ -1976,7 +1991,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else *sks->sortRelation = Z_SortKeySpec_ascending; - sks->caseSensitivity = (int *) + sks->caseSensitivity = (Odr_int *) nmem_malloc(stream, sizeof(*sks->caseSensitivity)); *sks->caseSensitivity = 0; @@ -1989,11 +2004,11 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const int *attributeSet, + const Odr_oid *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem) { - const int *curAttributeSet = attributeSet; + const Odr_oid *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; @@ -2009,67 +2024,63 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static RSET xpath_trunc(ZebraHandle zh, NMEM stream, - int reg_type, const char *term, + const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc) { - RSET rset; struct grep_info grep_info; - char term_dict[2048]; - char ord_buf[32]; - int prefix_len = 0; int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, - reg_type, - xpath_use); - int ord_len, i, r, max_pos; - int term_type = Z_Term_characterString; - const char *flags = "void"; - - if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) + index_type, xpath_use); + if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); if (ord < 0) return rset_create_null(rset_nmem, kc, 0); - if (prefix_len) - term_dict[prefix_len++] = '|'; else - term_dict[prefix_len++] = '('; - - ord_len = key_SU_encode (ord, ord_buf); - for (i = 0; ireg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); + yaz_log(YLOG_DEBUG, "%s %d positions", term, + grep_info.isam_p_indx); + rset = rset_trunc(zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term, strlen(term), + flags, 1, term_type, rset_nmem, + kc, kc->scope, 0, index_type, 0 /* hits_limit */, + 0 /* term_ref_id_str */); + grep_info_delete(&grep_info); + wrbuf_destroy(term_dict); + return rset; } - term_dict[prefix_len++] = ')'; - strcpy(term_dict+prefix_len, term); - - grep_info.isam_p_indx = 0; - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, - &grep_info, &max_pos, 0, grep_handle); - yaz_log(YLOG_DEBUG, "%s %d positions", term, - grep_info.isam_p_indx); - rset = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term, strlen(term), - flags, 1, term_type,rset_nmem, - kc, kc->scope, 0, reg_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - grep_info_delete(&grep_info); - return rset; } static ZEBRA_RES rpn_search_xpath(ZebraHandle zh, - int num_bases, char **basenames, NMEM stream, const char *rank_type, RSET rset, int xpath_len, struct xpath_location_step *xpath, NMEM rset_nmem, RSET *rset_out, struct rset_key_control *kc) { - int base_no; int i; int always_matches = rset ? 0 : 1; @@ -2087,38 +2098,30 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } /* - //a -> a/.* - //a/b -> b/a/.* - /a -> a/ - /a/b -> b/a/ + //a -> a/.* + //a/b -> b/a/.* + /a -> a/ + /a/b -> b/a/ - / -> none + / -> none - a[@attr = value]/b[@other = othervalue] + a[@attr = value]/b[@other = othervalue] - /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) - /a/b val range(b/a/,freetext(w,1016,val),b/a/) - /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) - /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) - /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) - /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) + /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) + /a/b val range(b/a/,freetext(w,1016,val),b/a/) + /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) + /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) + /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) + /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) */ - dict_grep_cmap (zh->reg->dict, 0, 0); - - for (base_no = 0; base_no < num_bases; base_no++) + dict_grep_cmap(zh->reg->dict, 0, 0); + { int level = xpath_len; int first_path = 1; - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - *rset_out = rset; - return ZEBRA_FAIL; - } while (--level >= 0) { WRBUF xpath_rev = wrbuf_alloc(); @@ -2168,7 +2171,8 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } } rset_attr = xpath_trunc( - zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, + zh, stream, "0", wrbuf_cstr(wbuf), + ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); wrbuf_destroy(wbuf); } @@ -2184,7 +2188,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { - rset_start_tag = xpath_trunc(zh, stream, '0', + rset_start_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); @@ -2192,7 +2196,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, rset = rset_start_tag; else { - rset_end_tag = xpath_trunc(zh, stream, '0', + rset_end_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_END, rset_nmem, kc); @@ -2212,16 +2216,73 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + Z_SortKeySpecList *sort_sequence, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc); + static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const int *attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { + RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets)); + ZEBRA_RES res = ZEBRA_OK; + int i; + for (i = 0; i < num_bases; i++) + { + + if (zebraExplain_curDatabase(zh->reg->zei, basenames[i])) + { + zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, + basenames[i]); + res = ZEBRA_FAIL; + break; + } + res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream, + sort_sequence, + rset_nmem, rsets+i, kc); + if (res != ZEBRA_OK) + break; + } + if (res != ZEBRA_OK) + { /* must clean up the already created sets */ + while (--i >= 0) + rset_delete(rsets[i]); + *rset = 0; + } + else + { + if (num_bases == 1) + *rset = rsets[0]; + else if (num_bases == 0) + *rset = rset_create_null(rset_nmem, kc, 0); + else + *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */, + num_bases, rsets); + } + return res; +} + +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + Z_SortKeySpecList *sort_sequence, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) +{ ZEBRA_RES res = ZEBRA_OK; - unsigned reg_id; + const char *index_type; char *search_type = NULL; char rank_type[128]; int complete_flag; @@ -2236,10 +2297,10 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, log_level_rpn = yaz_log_module_level("rpn"); log_level_set = 1; } - zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type, + zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag); - yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id); + yaz_log(YLOG_DEBUG, "index_type=%s", index_type); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type); @@ -2271,8 +2332,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (relation_value == 103) /* alwaysmatches */ { *rset = 0; /* signal no "term" set */ - return rpn_search_xpath(zh, num_bases, basenames, - stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } } @@ -2288,26 +2348,29 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, */ if (!strcmp(search_type, "phrase")) { - res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "and-list")) { - res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "or-list")) { - res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "local")) @@ -2318,9 +2381,9 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else if (!strcmp(search_type, "numeric")) { res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else @@ -2332,16 +2395,16 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return res; if (!*rset) return ZEBRA_FAIL; - return rpn_search_xpath(zh, num_bases, basenames, - stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const int *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc); @@ -2378,10 +2441,11 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - const int *attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET *result_set) { RSET *result_sets = 0; @@ -2389,7 +2453,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, ZEBRA_RES res; struct rset_key_control *kc = zebra_key_control_create(zh); - res = rpn_search_structure(zh, zs, attributeSet, + res = rpn_search_structure(zh, zs, attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, @@ -2415,10 +2479,10 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - const int *attributeSet, + const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc) @@ -2434,7 +2498,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, int num_result_sets_r = 0; res = rpn_search_structure(zh, zs->u.complex->s1, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_l, &num_result_sets_l, @@ -2447,7 +2511,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return res; } res = rpn_search_structure(zh, zs->u.complex->s2, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_r, &num_result_sets_r, @@ -2542,7 +2606,8 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { yaz_log(YLOG_DEBUG, "rpn_search_APT"); res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm, - attributeSet, stream, sort_sequence, + attributeSet, hits_limit, + stream, sort_sequence, num_bases, basenames, rset_nmem, &rset, kc); if (res != ZEBRA_OK) @@ -2584,6 +2649,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab