X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=47c1c234c84500c4dda68aab85c7a8301fc3cba5;hp=aaae58a5a1c6c5d0369d0ca3bc977d4f2b9f59c9;hb=c5971ebf8a88865ed9a1f7c8cf9daa22544f07be;hpb=3e4a78274a6cb7a99f3e90967ea30c830ffbf8c3 diff --git a/index/rpnsearch.c b/index/rpnsearch.c index aaae58a..47c1c23 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,8 +1,5 @@ -/* $Id: rpnsearch.c,v 1.10 2007-03-19 21:50:39 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #ifdef WIN32 @@ -45,7 +45,7 @@ static int log_level_rpn = 0; static const char **rpn_char_map_handler(void *vp, const char **from, int len) { struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp; - const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0); + const char **out = zebra_maps_input(p->zm, from, len, 0); #if 0 if (out && *out) { @@ -61,28 +61,30 @@ static const char **rpn_char_map_handler(void *vp, const char **from, int len) return out; } -void rpn_char_map_prepare(struct zebra_register *reg, int reg_type, +void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info) { - map_info->zm = reg->zebra_maps; - map_info->reg_type = reg_type; - dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); + map_info->zm = zm; + if (zebra_maps_is_icu(zm)) + dict_grep_cmap(reg->dict, 0, 0); + else + dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); } -#define TERM_COUNT - -struct grep_info { -#ifdef TERM_COUNT - int *term_no; -#endif +#define TERM_COUNT + +struct grep_info { +#ifdef TERM_COUNT + int *term_no; +#endif ISAM_P *isam_p_buf; - int isam_p_size; + int isam_p_size; int isam_p_indx; int trunc_max; ZebraHandle zh; - int reg_type; + const char *index_type; ZebraSet termset; -}; +}; static int add_isam_p(const char *name, const char *info, struct grep_info *p) @@ -100,8 +102,8 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_indx == p->isam_p_size) { ISAM_P *new_isam_p_buf; -#ifdef TERM_COUNT - int *new_term_no; +#ifdef TERM_COUNT + int *new_term_no; #endif p->isam_p_size = 2*p->isam_p_size + 100; new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) * @@ -109,7 +111,7 @@ static int add_isam_p(const char *name, const char *info, if (p->isam_p_buf) { memcpy(new_isam_p_buf, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->isam_p_buf)); + p->isam_p_indx * sizeof(*p->isam_p_buf)); xfree(p->isam_p_buf); } p->isam_p_buf = new_isam_p_buf; @@ -119,7 +121,7 @@ static int add_isam_p(const char *name, const char *info, if (p->term_no) { memcpy(new_term_no, p->isam_p_buf, - p->isam_p_indx * sizeof(*p->term_no)); + p->isam_p_indx * sizeof(*p->term_no)); xfree(p->term_no); } p->term_no = new_term_no; @@ -134,14 +136,14 @@ static int add_isam_p(const char *name, const char *info, char term_tmp[IT_MAX_WORD]; int ord = 0; const char *index_name; - int len = key_SU_decode (&ord, (const unsigned char *) name); - - zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len); + int len = key_SU_decode(&ord, (const unsigned char *) name); + + zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len); yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord(p->zh->reg->zei, ord, 0 /* index_type */, &db, &index_name); yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name); - + resultSetAddTerm(p->zh, p->termset, name[len], db, index_name, term_tmp); } @@ -154,8 +156,8 @@ static int grep_handle(char *name, const char *info, void *p) return add_isam_p(name, info, (struct grep_info *) p); } -static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, - const char *ct1, const char *ct2, int first) +static int term_pre(zebra_map_t zm, const char **src, + const char *ct1, int first) { const char *s1, *s0 = *src; const char **map; @@ -165,10 +167,8 @@ static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, { if (ct1 && strchr(ct1, *s0)) break; - if (ct2 && strchr(ct2, *s0)) - break; s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first); + map = zebra_maps_input(zm, &s1, strlen(s1), first); if (**map != *CHR_SPACE) break; s0 = s1; @@ -187,7 +187,7 @@ static void esc_str(char *out_buf, size_t out_size, assert(in_buf); assert(out_size > 20); *out_buf = '\0'; - for (k = 0; k 0 && buf[--i] != '\x01') /* skip length */ + ; + while (i > 0 && buf[--i] != '\x01') /* skip accents */ + ; + return i; /* only basechars left */ +} + +static int term_102_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) +{ + int no_terms = 0; + const char *s0 = *src, *s1; + while (*s0 == ' ') + s0++; + s1 = s0; + for (;;) + { + if (*s1 == ' ' && space_split) + break; + else if (*s1 && !strchr(REGEX_CHARS "-", *s1)) + s1++; + else + { + /* EOF or regex reserved char */ + if (s0 != s1) + { + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + + zebra_map_tokenize_start(zm, s0, s1 - s0); + + if (zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + size_t i; + res_len = icu_basechars(res_buf, res_len); + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + wrbuf_write(display_term, display_buf, display_len); + + no_terms++; + } + } + if (*s1 == '\0') + break; + + wrbuf_putc(term_dict, *s1); + wrbuf_putc(display_term, *s1); + + s1++; + s0 = s1; + } + } + if (no_terms) + wrbuf_puts(term_dict, "\x01\x01.*"); + *src = s1; + return no_terms; +} + +static int term_100_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, + WRBUF display_term, + int mode, + size_t token_number) +{ + size_t i; + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + + zebra_map_tokenize_start(zm, *src, strlen(*src)); + for (i = 0; i <= token_number; i++) + { + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + return 0; + } + wrbuf_write(display_term, display_buf, display_len); + if (mode) + { + res_len = icu_basechars(res_buf, res_len); + } + if (mode & 2) + wrbuf_puts(term_dict, ".*"); + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + if (mode & 1) + wrbuf_puts(term_dict, ".*"); + else if (mode) + wrbuf_puts(term_dict, "\x01\x01.*"); + return 1; +} /* term_100: handle term, where trunc = none(no operators at all) */ -static int term_100(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) +static int term_100(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; const char *space_start = 0; const char *space_end = 0; - if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split)) + if (!term_pre(zm, src, 0, !space_split)) return 0; s0 = *src; while (*s0) { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split) { if (**map == *CHR_SPACE) @@ -246,110 +390,71 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, while (space_start < space_end) { if (strchr(REGEX_CHARS, *space_start)) - dst[i++] = '\\'; - dst_term[j++] = *space_start; - dst[i++] = *space_start++; + wrbuf_putc(term_dict, '\\'); + wrbuf_putc(display_term, *space_start); + wrbuf_putc(term_dict, *space_start); + space_start++; + } /* and reset */ space_start = space_end = 0; } } - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } - dst[i] = '\0'; - dst_term[j] = '\0'; *src = s0; return i; } /* term_101: handle term, where trunc = Process # */ -static int term_101(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) +static int term_101(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split)) + if (!term_pre(zm, src, "#", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '#') { - dst[i++] = '.'; - dst[i++] = '*'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, ".*"); + wrbuf_putc(display_term, *s0); + s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst[i] = '\0'; - dst_term[j++] = '\0'; *src = s0; return i; } /* term_103: handle term, where trunc = re-2 (regular expressions) */ -static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split, - char *dst_term) +static int term_103(zebra_map_t zm, const char **src, + WRBUF term_dict, int *errors, int space_split, + WRBUF display_term) { int i = 0; - int j = 0; const char *s0; const char **map; - if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zm, src, "^\\()[].*+?|", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -364,209 +469,154 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, { if (strchr("^\\()[].*+?|-", *s0)) { - dst_term[j++] = *s0; - dst[i++] = *s0++; + wrbuf_putc(display_term, *s0); + wrbuf_putc(term_dict, *s0); + s0++; + i++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst[i] = '\0'; - dst_term[j] = '\0'; *src = s0; - + return i; } /* term_103: handle term, where trunc = re-1 (regular expressions) */ -static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split, char *dst_term) +static int term_102(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, WRBUF display_term) { - return term_103(zebra_maps, reg_type, src, dst, NULL, space_split, - dst_term); + return term_103(zm, src, term_dict, NULL, space_split, display_term); } -/* term_104: handle term, where trunc = Process # and ! */ -static int term_104(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) +/* term_104: handle term, process ?n * # */ +static int term_104(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, WRBUF display_term) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split)) + if (!term_pre(zm, src, "?*#", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '?') { - dst_term[j++] = *s0++; + i++; + wrbuf_putc(display_term, *s0); + s0++; if (*s0 >= '0' && *s0 <= '9') { int limit = 0; while (*s0 >= '0' && *s0 <= '9') { limit = limit * 10 + (*s0 - '0'); - dst_term[j++] = *s0++; + wrbuf_putc(display_term, *s0); + s0++; } if (limit > 20) limit = 20; while (--limit >= 0) { - dst[i++] = '.'; - dst[i++] = '?'; + wrbuf_puts(term_dict, ".?"); } } else { - dst[i++] = '.'; - dst[i++] = '*'; + wrbuf_puts(term_dict, ".*"); } } else if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, ".*"); + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '#') { - dst[i++] = '.'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, "."); + wrbuf_putc(display_term, *s0); + s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } - dst[i] = '\0'; - dst_term[j++] = '\0'; *src = s0; return i; } -/* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term, int right_truncate) +/* term_105/106: handle term, process * ! and possibly right_truncate */ +static int term_105(zebra_map_t zm, const char **src, + WRBUF term_dict, int space_split, + WRBUF display_term, int right_truncate) { const char *s0; const char **map; int i = 0; - int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split)) + if (!term_pre(zm, src, "\\*!", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; - dst_term[j++] = *s0++; + i++; + wrbuf_puts(term_dict, ".*"); + wrbuf_putc(display_term, *s0); + s0++; } else if (*s0 == '!') { - dst[i++] = '.'; - dst_term[j++] = *s0++; + i++; + wrbuf_putc(term_dict, '.'); + wrbuf_putc(display_term, *s0); + s0++; + } + else if (*s0 == '\\') + { + i++; + wrbuf_puts(term_dict, "\\\\"); + wrbuf_putc(display_term, *s0); + s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), - &q_map_match); + map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, display_term, map, q_map_match); } } if (right_truncate) - { - dst[i++] = '.'; - dst[i++] = '*'; - } - dst[i] = '\0'; - - dst_term[j++] = '\0'; + wrbuf_puts(term_dict, ".*"); *src = s0; return i; } @@ -576,8 +626,10 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, * val: border value (inclusive) * islt: 1 if <=; 0 if >=. */ -static void gen_regular_rel(char *dst, int val, int islt) +static void gen_regular_rel(WRBUF term_dict, int val, int islt) { + char dst_buf[20*5*20]; /* assuming enough for expansion */ + char *dst = dst_buf; int dst_p; int w, d, i; int pos = 0; @@ -590,13 +642,12 @@ static void gen_regular_rel(char *dst, int val, int islt) strcpy(dst, "(-[0-9]+|("); else strcpy(dst, "(("); - } + } else { if (!islt) { strcpy(dst, "([0-9]+|-("); - dst_p = strlen(dst); islt = 1; } else @@ -618,7 +669,7 @@ static void gen_regular_rel(char *dst, int val, int islt) if (d == '0') continue; d--; - } + } else { if (d == '9') @@ -626,7 +677,7 @@ static void gen_regular_rel(char *dst, int val, int islt) d++; } } - + strcpy(dst + dst_p, numstr); dst_p = strlen(dst) - pos - 1; @@ -646,7 +697,7 @@ static void gen_regular_rel(char *dst, int val, int islt) else { if (d != '9') - { + { dst[dst_p++] = '['; dst[dst_p++] = d; dst[dst_p++] = '-'; @@ -656,7 +707,7 @@ static void gen_regular_rel(char *dst, int val, int islt) else dst[dst_p++] = d; } - for (i = 0; ireg->zebra_maps, reg_type, - term_sub, term_component, - space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <"); - - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); + string_rel_add_char(term_dict, term_component, &j); - *term_tmp++ = '['; + wrbuf_putc(term_dict, '['); - *term_tmp++ = '^'; + wrbuf_putc(term_dict, '^'); - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; - yaz_log(YLOG_LOG, "term_dict=%s", term_dict); + wrbuf_putc(term_dict, ')'); break; case 2: - if (!term_100(zh->reg->zebra_maps, reg_type, - term_sub, term_component, - space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <="); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; - - *term_tmp++ = '^'; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + wrbuf_putc(term_dict, '^'); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i]; ) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, ')'); break; case 5: - if (!term_100 (zh->reg->zebra_maps, reg_type, - term_sub, term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >"); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; - - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i];) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '.'; - *term_tmp++ = '+'; - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '+'); + wrbuf_putc(term_dict, ')'); break; case 4: - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, - term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >="); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - if (term_component[i+1]) + if (i < wrbuf_len(term_component)-1) { - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); } else { - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); } - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; + wrbuf_putc(term_dict, ')'); break; case 3: case 102: @@ -875,12 +921,14 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!**term_sub) return 1; yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, - term_component, space_split, term_dst)) + if (!term_100(zm, term_sub, term_component, space_split, display_term)) + { + wrbuf_destroy(term_component); return 0; - strcat(term_tmp, "("); - strcat(term_tmp, term_component); - strcat(term_tmp, ")"); + } + wrbuf_puts(term_dict, "("); + wrbuf_puts(term_dict, wrbuf_cstr(term_component)); + wrbuf_puts(term_dict, ")"); break; case 103: yaz_log(log_level_rpn, "Relation always matches"); @@ -890,33 +938,37 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; + wrbuf_destroy(term_component); return 0; } + wrbuf_destroy(term_component); return 1; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, + const char **term_sub, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, + const char *index_type, int complete_flag, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol); - -static ZEBRA_RES term_limits_APT(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - zint *hits_limit_value, - const char **term_ref_id_str, - NMEM nmem) + struct ord_list **ol, + zebra_map_t zm, size_t token_number); + +ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + zint *hits_limit_value, + const char **term_ref_id_str, + NMEM nmem) { AttrType term_ref_id_attr; AttrType hits_limit_attr; int term_ref_id_int; - + zint hits_limit_from_attr; + attr_init_APT(&hits_limit_attr, zapt, 11); - *hits_limit_value = attr_find(&hits_limit_attr, NULL); + hits_limit_from_attr = attr_find(&hits_limit_attr, NULL); attr_init_APT(&term_ref_id_attr, zapt, 10); term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str); @@ -926,270 +978,318 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh, sprintf(res, "%d", term_ref_id_int); *term_ref_id_str = res; } + if (hits_limit_from_attr != -1) + *hits_limit_value = hits_limit_from_attr; - /* no limit given ? */ - if (*hits_limit_value == -1) - { - if (*term_ref_id_str) - { - /* use global if term_ref is present */ - *hits_limit_value = zh->approx_limit; - } - else - { - /* no counting if term_ref is not present */ - *hits_limit_value = 0; - } - } - else if (*hits_limit_value == 0) - { - /* 0 is the same as global limit */ - *hits_limit_value = zh->approx_limit; - } yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, *term_ref_id_str ? *term_ref_id_str : "none", *hits_limit_value); return ZEBRA_OK; } -static ZEBRA_RES term_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, - struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, - const char *rank_type, - const char *xpath_use, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** \brief search for term (which may be truncated) + */ +static ZEBRA_RES search_term(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char **term_sub, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + struct grep_info *grep_info, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc, + zebra_map_t zm, + size_t token_number) { ZEBRA_RES res; struct ord_list *ol; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; + WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); *rset = 0; - - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); grep_info->isam_p_indx = 0; - res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, - reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use, &ol); - if (res != ZEBRA_OK) - return res; - if (!*term_sub) /* no more terms ? */ - return res; - yaz_log(log_level_rpn, "term: %s", term_dst); - *rset = rset_trunc(zh, grep_info->isam_p_buf, - grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */, - zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, hits_limit_value, - term_ref_id_str); - if (!*rset) - return ZEBRA_FAIL; - return ZEBRA_OK; + res = string_term(zh, zapt, term_sub, term_dict, + attributeSet, stream, grep_info, + index_type, complete_flag, + display_term, xpath_use, &ol, zm, token_number); + wrbuf_destroy(term_dict); + if (res == ZEBRA_OK && *term_sub) + { + yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term)); + *rset = rset_trunc(zh, grep_info->isam_p_buf, + grep_info->isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, + 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, index_type, hits_limit_value, + term_ref_id_str); + if (!*rset) + res = ZEBRA_FAIL; + } + wrbuf_destroy(display_term); + return res; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, + const char **term_sub, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, + const char *index_type, int complete_flag, + WRBUF display_term, const char *xpath_use, - struct ord_list **ol) + struct ord_list **ol, + zebra_map_t zm, size_t token_number) { - char term_dict[2*IT_MAX_WORD+4000]; - int j, r, base_no; + int r; AttrType truncation; int truncation_value; const char *termp; struct rpn_char_map_info rcmi; - int space_split = complete_flag ? 0 : 1; - int bases_ok = 0; /* no of databases with OK attribute */ + int space_split = complete_flag ? 0 : 1; + int ord = -1; + int regex_range = 0; + int max_pos, prefix_len = 0; + int relation_error; + char ord_buf[32]; + int ord_len, i; *ol = ord_list_create(stream); - rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, zm, &rcmi); attr_init_APT(&truncation, zapt, 5); truncation_value = attr_find(&truncation, NULL); yaz_log(log_level_rpn, "truncation value %d", truncation_value); - for (base_no = 0; base_no < num_bases; base_no++) + termp = *term_sub; /* start of term for each database */ + + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) { - int ord = -1; - int regex_range = 0; - int max_pos, prefix_len = 0; - int relation_error; - char ord_buf[32]; - int ord_len, i; + *term_sub = 0; + return ZEBRA_FAIL; + } - termp = *term_sub; /* start of term for each database */ + wrbuf_rewind(term_dict); /* new dictionary regexp term */ - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + *ol = ord_list_append(stream, *ol, ord); + ord_len = key_SU_encode(ord, ord_buf); + + wrbuf_putc(term_dict, '('); + + for (i = 0; i < ord_len; i++) + { + wrbuf_putc(term_dict, 1); /* our internal regexp escape char */ + wrbuf_putc(term_dict, ord_buf[i]); + } + wrbuf_putc(term_dict, ')'); + + prefix_len = wrbuf_len(term_dict); + + if (zebra_maps_is_icu(zm)) + { + int relation_value; + AttrType relation; + + attr_init_APT(&relation, zapt, 2); + relation_value = attr_find(&relation, NULL); + if (relation_value == 103) /* always matches */ + termp += strlen(termp); /* move to end of term */ + else if (relation_value == 3 || relation_value == 102 || relation_value == -1) { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - return ZEBRA_FAIL; + /* ICU case */ + switch (truncation_value) + { + case -1: /* not specified */ + case 100: /* do not truncate */ + if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 102: + if (!term_102_icu(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 1: /* right truncation */ + if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 2: + if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 3: + if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); + return ZEBRA_FAIL; + } } - - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, - attributeSet, &ord) != ZEBRA_OK) - continue; - - bases_ok++; - - *ol = ord_list_append(stream, *ol, ord); - ord_len = key_SU_encode (ord, ord_buf); - - term_dict[prefix_len++] = '('; - for (i = 0; ireg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - strcat(term_dict, ".*)"); + wrbuf_putc(term_dict, '('); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); break; - case 2: /* keft truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - strcat(term_dict, ")"); + case 2: /* left truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); break; case 3: /* left&right truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - strcat(term_dict, ".*)"); + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); break; case 101: /* process # in term */ - term_dict[j++] = '('; - if (!term_101(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_101(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_puts(term_dict, ")"); break; case 102: /* Regexp-1 */ - term_dict[j++] = '('; - if (!term_102(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_102(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; case 103: /* Regexp-2 */ regex_range = 1; - term_dict[j++] = '('; - if (!term_103(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, ®ex_range, - space_split, term_dst)) - { - *term_sub = 0; + wrbuf_putc(term_dict, '('); + if (!term_103(zm, &termp, term_dict, ®ex_range, + space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); - break; - case 104: /* process # and ! in term */ - term_dict[j++] = '('; - if (!term_104(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - { - *term_sub = 0; + } + wrbuf_putc(term_dict, ')'); + break; + case 104: /* process ?n * # term */ + wrbuf_putc(term_dict, '('); + if (!term_104(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; - case 105: /* process * and ! in term */ - term_dict[j++] = '('; - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 1)) - { - *term_sub = 0; + case 105: /* process * ! in term and right truncate */ + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 1)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; - case 106: /* process * and ! in term */ - term_dict[j++] = '('; - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 0)) - { - *term_sub = 0; + case 106: /* process * ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zm, &termp, term_dict, space_split, display_term, 0)) + { + *term_sub = 0; return ZEBRA_OK; - } - strcat(term_dict, ")"); + } + wrbuf_putc(term_dict, ')'); break; - default: - zebra_setError_zint(zh, - YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, - truncation_value); - return ZEBRA_FAIL; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); + return ZEBRA_FAIL; } - if (1) - { - char buf[80]; - const char *input = term_dict + prefix_len; - esc_str(buf, sizeof(buf), input, strlen(input)); - } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len); - r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range, - grep_info, &max_pos, - ord_len /* number of "exact" chars */, - grep_handle); - if (r == 1) - zebra_set_partial_result(zh); - else if (r) - yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); - } - if (!bases_ok) - return ZEBRA_FAIL; + } + if (1) + { + char buf[1000]; + const char *input = wrbuf_cstr(term_dict) + prefix_len; + esc_str(buf, sizeof(buf), input, strlen(input)); + } + { + WRBUF pr_wr = wrbuf_alloc(); + + wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict)); + yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr)); + wrbuf_destroy(pr_wr); + } + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, + grep_info, &max_pos, + ord_len /* number of "exact" chars */, + grep_handle); + if (r == 1) + zebra_set_partial_result(zh); + else if (r) + yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; @@ -1208,7 +1308,7 @@ static void grep_info_delete(struct grep_info *grep_info) static ZEBRA_RES grep_info_prepare(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, - int reg_type) + const char *index_type) { #ifdef TERM_COUNT grep_info->term_no = 0; @@ -1217,7 +1317,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, grep_info->isam_p_size = 0; grep_info->isam_p_buf = NULL; grep_info->zh = zh; - grep_info->reg_type = reg_type; + grep_info->index_type = index_type; grep_info->termset = 0; if (zapt) { @@ -1248,12 +1348,12 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, const char *termset_name = 0; if (termset_value_numeric != -2) { - + sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } else - termset_name = termset_value_string; + termset_name = termset_value_string; yaz_log(log_level_rpn, "creating termset set %s", termset_name); grep_info->termset = resultSetAdd(zh, termset_name, 1); if (!grep_info->termset) @@ -1266,75 +1366,56 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, } return ZEBRA_OK; } - -/** - \brief Create result set(s) for list of terms - \param zh Zebra Handle - \param zapt Attributes Plust Term (RPN leaf) - \param termz term as used in query but converted to UTF-8 - \param attributeSet default attribute set - \param stream memory for result - \param reg_type register type ('w', 'p',..) - \param complete_flag whether it's phrases or not - \param rank_type term flags for ranking - \param xpath_use use attribute for X-Path (-1 for no X-path) - \param num_bases number of databases - \param basenames array of databases - \param rset_nmem memory for result sets - \param result_sets output result set for each term in list (output) - \param num_result_sets number of output result sets - \param kc rset key control to be used for created result sets -*/ -static ZEBRA_RES term_list_trunc(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz, - oid_value attributeSet, - NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, - const char *xpath_use, - int num_bases, char **basenames, - NMEM rset_nmem, - RSET **result_sets, int *num_result_sets, - struct rset_key_control *kc) + +static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc, + zebra_map_t zm) { - char term_dst[IT_MAX_WORD+1]; struct grep_info grep_info; const char *termp = termz; int alloc_sets = 0; *num_result_sets = 0; - *term_dst = 0; - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; - while(1) - { + while (1) + { ZEBRA_RES res; if (alloc_sets == *num_result_sets) { int add = 10; - RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * sizeof(*rnew)); if (alloc_sets) memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew)); alloc_sets = alloc_sets + add; *result_sets = rnew; } - res = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - reg_type, complete_flag, - num_bases, basenames, - term_dst, rank_type, - xpath_use, rset_nmem, - &(*result_sets)[*num_result_sets], - kc); + res = search_term(zh, zapt, &termp, attributeSet, hits_limit, + stream, &grep_info, + index_type, complete_flag, + rank_type, + xpath_use, rset_nmem, + &(*result_sets)[*num_result_sets], + kc, zm, + *num_result_sets); if (res != ZEBRA_OK) { int i; for (i = 0; i < *num_result_sets; i++) rset_delete((*result_sets)[i]); - grep_info_delete (&grep_info); + grep_info_delete(&grep_info); return res; } if ((*result_sets)[*num_result_sets] == 0) @@ -1348,20 +1429,63 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, return ZEBRA_OK; } -static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - oid_value attributeSet, - int reg_type, - int num_bases, char **basenames, - NMEM rset_nmem, - RSET *rset, - struct rset_key_control *kc) +/** + \brief Create result set(s) for list of terms + \param zh Zebra Handle + \param zapt Attributes Plust Term (RPN leaf) + \param termz term as used in query but converted to UTF-8 + \param attributeSet default attribute set + \param stream memory for result + \param index_type register type ("w", "p",..) + \param complete_flag whether it's phrases or not + \param rank_type term flags for ranking + \param xpath_use use attribute for X-Path (-1 for no X-path) + \param rset_nmem memory for result sets + \param result_sets output result set for each term in list (output) + \param num_result_sets number of output result sets + \param kc rset key control to be used for created result sets +*/ +static ZEBRA_RES search_terms_list(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + const Odr_oid *attributeSet, + zint hits_limit, + NMEM stream, + const char *index_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc) +{ + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, result_sets, num_result_sets, + kc, zm); +} + + +/** \brief limit a search by position - returns result set + */ +static ZEBRA_RES search_position(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + const char *index_type, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { - RSET *f_set; - int base_no; int position_value; - int num_sets = 0; AttrType position; + int ord = -1; + char ord_buf[32]; + char term_dict[100]; + int ord_len; + char *val; + ISAM_P isam_p; + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); attr_init_APT(&position, zapt, 3); position_value = attr_find(&position, NULL); @@ -1379,75 +1503,46 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type)) + + if (!zebra_maps_is_first_in_field(zm)) { zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, position_value); return ZEBRA_FAIL; } - if (!zh->reg->isamb && !zh->reg->isamc) + if (zebra_apt_get_ord(zh, zapt, index_type, 0, + attributeSet, &ord) != ZEBRA_OK) { - zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, - position_value); return ZEBRA_FAIL; } - f_set = xmalloc(sizeof(RSET) * num_bases); - for (base_no = 0; base_no < num_bases; base_no++) + ord_len = key_SU_encode(ord, ord_buf); + memcpy(term_dict, ord_buf, ord_len); + strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); + val = dict_lookup(zh->reg->dict, term_dict); + if (val) { - int ord = -1; - char ord_buf[32]; - char term_dict[100]; - int ord_len; - char *val; - ISAM_P isam_p; - - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - return ZEBRA_FAIL; - } - - if (zebra_apt_get_ord(zh, zapt, reg_type, 0, - attributeSet, &ord) != ZEBRA_OK) - continue; - - ord_len = key_SU_encode (ord, ord_buf); - memcpy(term_dict, ord_buf, ord_len); - strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); - val = dict_lookup(zh->reg->dict, term_dict); - if (!val) - continue; assert(*val == sizeof(ISAM_P)); memcpy(&isam_p, val+1, sizeof(isam_p)); - - if (zh->reg->isamb) - f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope, - zh->reg->isamb, isam_p, 0); - else if (zh->reg->isamc) - f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope, - zh->reg->isamc, isam_p, 0); - } - if (num_sets) - { - *rset = rset_create_or(rset_nmem, kc, kc->scope, - 0 /* termid */, num_sets, f_set); + *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, + isam_p, 0); } - xfree(f_set); return ZEBRA_OK; } - + +/** \brief returns result set for phrase search + */ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1455,12 +1550,11 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, - rank_type, xpath_use, - num_bases, basenames, - rset_nmem, - &result_sets, &num_result_sets, kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, kc); if (res != ZEBRA_OK) return res; @@ -1468,13 +1562,17 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, if (num_result_sets > 0) { RSET first_set = 0; - res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, - num_bases, basenames, - rset_nmem, &first_set, - kc); + res = search_position(zh, zapt, attributeSet, + index_type, + rset_nmem, &first_set, + kc); if (res != ZEBRA_OK) + { + int i; + for (i = 0; i < num_result_sets; i++) + rset_delete(result_sets[i]); return res; + } if (first_set) { RSET *nsets = nmem_malloc(stream, @@ -1486,7 +1584,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, } } if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1499,15 +1597,18 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, return ZEBRA_OK; } +/** \brief returns result set for or-list search + */ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1516,26 +1617,24 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, - rank_type, xpath_use, - num_bases, basenames, - rset_nmem, - &result_sets, &num_result_sets, kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, kc); if (res != ZEBRA_OK) return res; - for (i = 0; iscope, 2, tmp_set, @@ -1555,7 +1654,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, } } if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1566,15 +1665,18 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, return ZEBRA_OK; } +/** \brief returns result set for and-list search + */ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, + const char *index_type, + int complete_flag, + const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1583,26 +1685,24 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, int num_result_sets = 0; int i; ZEBRA_RES res = - term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, - rank_type, xpath_use, - num_bases, basenames, - rset_nmem, - &result_sets, &num_result_sets, - kc); + search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit, + stream, index_type, complete_flag, + rank_type, xpath_use, + rset_nmem, + &result_sets, &num_result_sets, + kc); if (res != ZEBRA_OK) return res; - for (i = 0; iscope, 2, tmp_set, @@ -1624,7 +1724,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, if (num_result_sets == 0) - *rset = rset_create_null(rset_nmem, kc, 0); + *rset = rset_create_null(rset_nmem, kc, 0); else if (num_result_sets == 1) *rset = result_sets[0]; else @@ -1637,19 +1737,19 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - char *term_dict, - oid_value attributeSet, + WRBUF term_dict, + const Odr_oid *attributeSet, struct grep_info *grep_info, int *max_pos, - int reg_type, - char *term_dst, + zebra_map_t zm, + WRBUF display_term, int *error_code) { AttrType relation; int relation_value; int term_value; int r; - char *term_tmp = term_dict + strlen(term_dict); + WRBUF term_num = wrbuf_alloc(); *error_code = 0; attr_init_APT(&relation, zapt, 2); @@ -1661,44 +1761,55 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value-1, 1); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value-1, 1); break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 1); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 1); break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 0); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 0); break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value+1, 0); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value+1, 0); break; case -1: + case 102: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zm, term_sub, term_num, 1, display_term)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - sprintf(term_tmp, "(0*%d)", term_value); + } + term_value = atoi(wrbuf_cstr(term_num)); + wrbuf_printf(term_dict, "(0*%d)", term_value); break; case 103: /* term_tmp untouched.. */ @@ -1707,131 +1818,121 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; - return 0; + wrbuf_destroy(term_num); + return 0; } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos, - 0, grep_handle); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), + 0, grep_info, max_pos, 0, grep_handle); if (r == 1) zebra_set_partial_result(zh); else if (r) yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r); yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx); + wrbuf_destroy(term_num); return 1; } static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, + const char **term_sub, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, + const char *index_type, int complete_flag, + WRBUF display_term, const char *xpath_use, struct ord_list **ol) { - char term_dict[2*IT_MAX_WORD+2]; - int base_no; const char *termp; struct rpn_char_map_info rcmi; - - int bases_ok = 0; /* no of databases with OK attribute */ + int max_pos; + int relation_error = 0; + int ord, ord_len, i; + char ord_buf[32]; + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); *ol = ord_list_create(stream); - rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, zm, &rcmi); - for (base_no = 0; base_no < num_bases; base_no++) - { - int max_pos, prefix_len = 0; - int relation_error = 0; - int ord, ord_len, i; - char ord_buf[32]; + termp = *term_sub; - termp = *term_sub; + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) + { + return ZEBRA_FAIL; + } - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - return ZEBRA_FAIL; - } + wrbuf_rewind(term_dict); - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, - attributeSet, &ord) != ZEBRA_OK) - continue; - bases_ok++; + *ol = ord_list_append(stream, *ol, ord); - *ol = ord_list_append(stream, *ol, ord); + ord_len = key_SU_encode(ord, ord_buf); - ord_len = key_SU_encode (ord, ord_buf); + wrbuf_putc(term_dict, '('); + for (i = 0; i < ord_len; i++) + { + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, ord_buf[i]); + } + wrbuf_putc(term_dict, ')'); - term_dict[prefix_len++] = '('; - for (i = 0; i < ord_len; i++) + if (!numeric_relation(zh, zapt, &termp, term_dict, + attributeSet, grep_info, &max_pos, zm, + display_term, &relation_error)) + { + if (relation_error) { - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = ord_buf[i]; + zebra_setError(zh, relation_error, 0); + return ZEBRA_FAIL; } - term_dict[prefix_len++] = ')'; - term_dict[prefix_len] = '\0'; - - if (!numeric_relation(zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, reg_type, - term_dst, &relation_error)) - { - if (relation_error) - { - zebra_setError(zh, relation_error, 0); - return ZEBRA_FAIL; - } - *term_sub = 0; - return ZEBRA_OK; - } + *term_sub = 0; + return ZEBRA_OK; } - if (!bases_ok) - return ZEBRA_FAIL; *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; } - + static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, + const char *index_type, + int complete_flag, + const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; const char *termp = termz; RSET *result_sets = 0; int num_result_sets = 0; ZEBRA_RES res; struct grep_info grep_info; int alloc_sets = 0; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) - { + { struct ord_list *ol; + WRBUF term_dict = wrbuf_alloc(); + WRBUF display_term = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; - RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * sizeof(*rnew)); if (alloc_sets) memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew)); @@ -1840,21 +1941,27 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, } yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp); grep_info.isam_p_indx = 0; - res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info, - reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use, &ol); + res = numeric_term(zh, zapt, &termp, term_dict, + attributeSet, stream, &grep_info, + index_type, complete_flag, + display_term, xpath_use, &ol); + wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) + { + wrbuf_destroy(display_term); break; - yaz_log(YLOG_DEBUG, "term: %s", term_dst); + } + yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term)); result_sets[num_result_sets] = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term_dst, - strlen(term_dst), rank_type, + grep_info.isam_p_indx, wrbuf_buf(display_term), + wrbuf_len(display_term), rank_type, 0 /* preserve position */, - zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); + wrbuf_destroy(display_term); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -1880,7 +1987,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, @@ -1888,7 +1995,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, { Record rec; zint sysno = atozint(termz); - + if (sysno <= 0) sysno = 0; rec = rec_get(zh->reg->records, sysno); @@ -1908,7 +2015,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, *rset = rset_create_temp(rset_nmem, kc, kc->scope, res_get(zh->res, "setTmpDir"), 0); rsfd = rset_open(*rset, RSETF_WRITE); - + key.mem[0] = sysno; key.mem[1] = 1; key.len = 2; @@ -1919,7 +2026,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, } static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, @@ -1931,10 +2038,8 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, AttrType sort_relation_type; Z_SortKeySpec *sks; Z_SortKey *sk; - int oid[OID_SIZE]; - oident oe; char termz[20]; - + attr_init_APT(&sort_relation_type, zapt, 7); sort_relation_value = attr_find(&sort_relation_type, &attributeSet); @@ -1943,25 +2048,19 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sort_sequence->num_specs = 10; sort_sequence->specs = (Z_SortKeySpec **) nmem_malloc(stream, sort_sequence->num_specs * - sizeof(*sort_sequence->specs)); - for (i = 0; inum_specs; i++) + sizeof(*sort_sequence->specs)); + for (i = 0; i < sort_sequence->num_specs; i++) sort_sequence->specs[i] = 0; } if (zapt->term->which != Z_Term_general) i = 0; else - i = atoi_n ((char *) zapt->term->u.general->buf, - zapt->term->u.general->len); + i = atoi_n((char *) zapt->term->u.general->buf, + zapt->term->u.general->len); if (i >= sort_sequence->num_specs) i = 0; sprintf(termz, "%d", i); - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = attributeSet; - if (!oid_ent_to_oid (&oe, oid)) - return ZEBRA_FAIL; - sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks)); sks->sortElement = (Z_SortElement *) nmem_malloc(stream, sizeof(*sks->sortElement)); @@ -1972,19 +2071,19 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes = (Z_SortAttributes *) nmem_malloc(stream, sizeof(*sk->u.sortAttributes)); - sk->u.sortAttributes->id = oid; + sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; - sks->sortRelation = (int *) + sks->sortRelation = (Odr_int *) nmem_malloc(stream, sizeof(*sks->sortRelation)); if (sort_relation_value == 1) *sks->sortRelation = Z_SortKeySpec_ascending; else if (sort_relation_value == 2) *sks->sortRelation = Z_SortKeySpec_descending; - else + else *sks->sortRelation = Z_SortKeySpec_ascending; - sks->caseSensitivity = (int *) + sks->caseSensitivity = (Odr_int *) nmem_malloc(stream, sizeof(*sks->caseSensitivity)); *sks->caseSensitivity = 0; @@ -1997,14 +2096,14 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, + const Odr_oid *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem) { - oid_value curAttributeSet = attributeSet; + const Odr_oid *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; - + attr_init_APT(&use, zapt, 1); attr_find_ex(&use, &curAttributeSet, &use_string); @@ -2013,71 +2112,67 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return zebra_parse_xpath_str(use_string, xpath, max, mem); } - - + + static RSET xpath_trunc(ZebraHandle zh, NMEM stream, - int reg_type, const char *term, + const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc) { - RSET rset; struct grep_info grep_info; - char term_dict[2048]; - char ord_buf[32]; - int prefix_len = 0; - int ord = zebraExplain_lookup_attr_str(zh->reg->zei, + int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, - reg_type, - xpath_use); - int ord_len, i, r, max_pos; - int term_type = Z_Term_characterString; - const char *flags = "void"; - - if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) + index_type, xpath_use); + if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); - + if (ord < 0) return rset_create_null(rset_nmem, kc, 0); - if (prefix_len) - term_dict[prefix_len++] = '|'; else - term_dict[prefix_len++] = '('; - - ord_len = key_SU_encode (ord, ord_buf); - for (i = 0; ireg->dict, term_dict, 0, - &grep_info, &max_pos, 0, grep_handle); - yaz_log(YLOG_DEBUG, "%s %d positions", term, - grep_info.isam_p_indx); - rset = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term, strlen(term), - flags, 1, term_type,rset_nmem, - kc, kc->scope, 0, reg_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - grep_info_delete(&grep_info); - return rset; + { + int i, max_pos; + char ord_buf[32]; + RSET rset; + WRBUF term_dict = wrbuf_alloc(); + int ord_len = key_SU_encode(ord, ord_buf); + int term_type = Z_Term_characterString; + const char *flags = "void"; + + wrbuf_putc(term_dict, '('); + for (i = 0; i < ord_len; i++) + { + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, ord_buf[i]); + } + wrbuf_putc(term_dict, ')'); + wrbuf_puts(term_dict, term); + + grep_info.isam_p_indx = 0; + dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); + yaz_log(YLOG_DEBUG, "%s %d positions", term, + grep_info.isam_p_indx); + rset = rset_trunc(zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term, strlen(term), + flags, 1, term_type, rset_nmem, + kc, kc->scope, 0, index_type, 0 /* hits_limit */, + 0 /* term_ref_id_str */); + grep_info_delete(&grep_info); + wrbuf_destroy(term_dict); + return rset; + } } static ZEBRA_RES rpn_search_xpath(ZebraHandle zh, - int num_bases, char **basenames, NMEM stream, const char *rank_type, RSET rset, int xpath_len, struct xpath_location_step *xpath, NMEM rset_nmem, RSET *rset_out, struct rset_key_control *kc) { - int base_no; int i; int always_matches = rset ? 0 : 1; @@ -2088,45 +2183,37 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len); - for (i = 0; i a/.* - //a/b -> b/a/.* - /a -> a/ - /a/b -> b/a/ - - / -> none - - a[@attr = value]/b[@other = othervalue] - - /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) - /a/b val range(b/a/,freetext(w,1016,val),b/a/) - /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) - /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) - /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) - /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) - + //a -> a/.* + //a/b -> b/a/.* + /a -> a/ + /a/b -> b/a/ + + / -> none + + a[@attr = value]/b[@other = othervalue] + + /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/) + /a/b val range(b/a/,freetext(w,1016,val),b/a/) + /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/) + /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y) + /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y) + /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x) + */ - dict_grep_cmap (zh->reg->dict, 0, 0); + dict_grep_cmap(zh->reg->dict, 0, 0); - for (base_no = 0; base_no < num_bases; base_no++) { int level = xpath_len; int first_path = 1; - - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - *rset_out = rset; - return ZEBRA_FAIL; - } + while (--level >= 0) { WRBUF xpath_rev = wrbuf_alloc(); @@ -2166,7 +2253,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, { const char *cp = xpath[level].predicate->u.relation.value; wrbuf_putc(wbuf, '='); - + while (*cp) { if (strchr(REGEX_CHARS, *cp)) @@ -2176,11 +2263,12 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } } rset_attr = xpath_trunc( - zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, + zh, stream, "0", wrbuf_cstr(wbuf), + ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); wrbuf_destroy(wbuf); - } - else + } + else { if (!first_path) { @@ -2188,23 +2276,23 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, continue; } } - yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, + yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { - rset_start_tag = xpath_trunc(zh, stream, '0', + rset_start_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), - ZEBRA_XPATH_ELM_BEGIN, + ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); if (always_matches) rset = rset_start_tag; else { - rset_end_tag = xpath_trunc(zh, stream, '0', + rset_end_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), - ZEBRA_XPATH_ELM_END, + ZEBRA_XPATH_ELM_END, rset_nmem, kc); - + rset = rset_create_between(rset_nmem, kc, kc->scope, rset_start_tag, rset, rset_end_tag, rset_attr); @@ -2220,16 +2308,73 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + Z_SortKeySpecList *sort_sequence, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc); + static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { + RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets)); ZEBRA_RES res = ZEBRA_OK; - unsigned reg_id; + int i; + for (i = 0; i < num_bases; i++) + { + + if (zebraExplain_curDatabase(zh->reg->zei, basenames[i])) + { + zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, + basenames[i]); + res = ZEBRA_FAIL; + break; + } + res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream, + sort_sequence, + rset_nmem, rsets+i, kc); + if (res != ZEBRA_OK) + break; + } + if (res != ZEBRA_OK) + { /* must clean up the already created sets */ + while (--i >= 0) + rset_delete(rsets[i]); + *rset = 0; + } + else + { + if (num_bases == 1) + *rset = rsets[0]; + else if (num_bases == 0) + *rset = rset_create_null(rset_nmem, kc, 0); + else + *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */, + num_bases, rsets); + } + return res; +} + +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, + Z_SortKeySpecList *sort_sequence, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) +{ + ZEBRA_RES res = ZEBRA_OK; + const char *index_type; char *search_type = NULL; char rank_type[128]; int complete_flag; @@ -2244,10 +2389,10 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, log_level_rpn = yaz_log_module_level("rpn"); log_level_set = 1; } - zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type, + zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag); - - yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id); + + yaz_log(YLOG_DEBUG, "index_type=%s", index_type); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type); @@ -2259,14 +2404,14 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence, rank_type, rset_nmem, rset, kc); /* consider if an X-Path query is used */ - xpath_len = rpn_check_xpath(zh, zapt, attributeSet, + xpath_len = rpn_check_xpath(zh, zapt, attributeSet, xpath, MAX_XPATH_STEPS, stream); if (xpath_len >= 0) { - if (xpath[xpath_len-1].part[0] == '@') + if (xpath[xpath_len-1].part[0] == '@') xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */ else - xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */ + xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */ if (1) { @@ -2279,8 +2424,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (relation_value == 103) /* alwaysmatches */ { *rset = 0; /* signal no "term" set */ - return rpn_search_xpath(zh, num_bases, basenames, - stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } } @@ -2288,7 +2432,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, /* search using one of the various search type strategies termz is our UTF-8 search term - attributeSet is top-level default attribute set + attributeSet is top-level default attribute set stream is ODR for search reg_id is the register type complete_flag is 1 for complete subfield, 0 for incomplete @@ -2296,26 +2440,29 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, */ if (!strcmp(search_type, "phrase")) { - res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "and-list")) { - res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "or-list")) { - res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "local")) @@ -2325,10 +2472,11 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } else if (!strcmp(search_type, "numeric")) { - res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit, + stream, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else @@ -2340,16 +2488,16 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return res; if (!*rset) return ZEBRA_FAIL; - return rpn_search_xpath(zh, num_bases, basenames, - stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc); @@ -2374,9 +2522,9 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm; AttrType global_hits_limit_attr; int l; - + attr_init_APT(&global_hits_limit_attr, zapt, 12); - + l = attr_find(&global_hits_limit_attr, NULL); if (l != -1) *approx_limit = l; @@ -2386,10 +2534,11 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET *result_set) { RSET *result_sets = 0; @@ -2397,9 +2546,9 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, ZEBRA_RES res; struct rset_key_control *kc = zebra_key_control_create(zh); - res = rpn_search_structure(zh, zs, attributeSet, + res = rpn_search_structure(zh, zs, attributeSet, hits_limit, stream, rset_nmem, - sort_sequence, + sort_sequence, num_bases, basenames, &result_sets, &num_result_sets, 0 /* no parent op */, @@ -2407,7 +2556,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, if (res != ZEBRA_OK) { int i; - for (i = 0; iu.complex->s1, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_l, &num_result_sets_l, @@ -2450,12 +2599,12 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, if (res != ZEBRA_OK) { int i; - for (i = 0; iu.complex->s2, - attributeSet, stream, rset_nmem, + attributeSet, hits_limit, stream, rset_nmem, sort_sequence, num_bases, basenames, &result_sets_r, &num_result_sets_r, @@ -2463,20 +2612,20 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, if (res != ZEBRA_OK) { int i; - for (i = 0; iwhich != zop->which @@ -2507,7 +2656,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, case Z_Operator_prox: if (zop->u.prox->which != Z_ProximityOperator_known) { - zebra_setError(zh, + zebra_setError(zh, YAZ_BIB1_UNSUPP_PROX_UNIT_CODE, 0); return ZEBRA_FAIL; @@ -2523,9 +2672,9 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { rset = rset_create_prox(rset_nmem, kc, kc->scope, - *num_result_sets, *result_sets, + *num_result_sets, *result_sets, *zop->u.prox->ordered, - (!zop->u.prox->exclusion ? + (!zop->u.prox->exclusion ? 0 : *zop->u.prox->exclusion), *zop->u.prox->relationType, *zop->u.prox->distance ); @@ -2536,7 +2685,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return ZEBRA_FAIL; } *num_result_sets = 1; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); (*result_sets)[0] = rset; } @@ -2550,7 +2699,8 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, { yaz_log(YLOG_DEBUG, "rpn_search_APT"); res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm, - attributeSet, stream, sort_sequence, + attributeSet, hits_limit, + stream, sort_sequence, num_bases, basenames, rset_nmem, &rset, kc); if (res != ZEBRA_OK) @@ -2562,7 +2712,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, rset = resultSetRef(zh, zs->u.simple->u.resultSetId); if (!rset) { - zebra_setError(zh, + zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, zs->u.simple->u.resultSetId); return ZEBRA_FAIL; @@ -2575,7 +2725,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, return ZEBRA_FAIL; } *num_result_sets = 1; - *result_sets = nmem_malloc(stream, *num_result_sets * + *result_sets = nmem_malloc(stream, *num_result_sets * sizeof(**result_sets)); (*result_sets)[0] = rset; } @@ -2592,6 +2742,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab