X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnsearch.c;h=094830fdb35fbbdab8abb5fdd448476dfa9ae0c9;hb=2eacaa1e549428b231de5844f397466f6a44c59f;hp=f5815122e125ca0d20bf124bb5b47239e385bea4;hpb=e1352999e1be86b0ce5c2698bb62f9fc7d598a4f;p=idzebra-moved-to-github.git diff --git a/index/rpnsearch.c b/index/rpnsearch.c index f581512..094830f 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,4 +1,4 @@ -/* $Id: rpnsearch.c,v 1.7 2007-01-16 15:31:23 adam Exp $ +/* $Id: rpnsearch.c,v 1.16 2007-10-29 16:57:53 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -134,9 +134,9 @@ static int add_isam_p(const char *name, const char *info, char term_tmp[IT_MAX_WORD]; int ord = 0; const char *index_name; - int len = key_SU_decode (&ord, (const unsigned char *) name); + int len = key_SU_decode(&ord, (const unsigned char *) name); - zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len); + zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len); yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord(p->zh->reg->zei, ord, 0 /* index_type */, &db, &index_name); @@ -206,9 +206,36 @@ static void esc_str(char *out_buf, size_t out_size, #define REGEX_CHARS " []()|.*+?!" +static void add_non_space(const char *start, const char *end, + WRBUF term_dict, + char *dst_term, int *dst_ptr, + const char **map, int q_map_match) +{ + size_t sz = end - start; + memcpy(dst_term + *dst_ptr, start, sz); + (*dst_ptr) += sz; + if (!q_map_match) + { + while (start < end) + { + if (strchr(REGEX_CHARS, *start)) + wrbuf_putc(term_dict, '\\'); + wrbuf_putc(term_dict, *start); + start++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + wrbuf_puts(term_dict, map[0]); + } +} + /* term_100: handle term, where trunc = none(no operators at all) */ -static int term_100(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, +static int term_100(ZebraMaps zebra_maps, const char *index_type, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { const char *s0; @@ -219,14 +246,14 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, const char *space_start = 0; const char *space_end = 0; - if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split)) + if (!term_pre(zebra_maps, *index_type, src, NULL, NULL, !space_split)) return 0; s0 = *src; while (*s0) { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split) { @@ -246,44 +273,29 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, while (space_start < space_end) { if (strchr(REGEX_CHARS, *space_start)) - dst[i++] = '\\'; + wrbuf_putc(term_dict, '\\'); dst_term[j++] = *space_start; - dst[i++] = *space_start++; + wrbuf_putc(term_dict, *space_start); + space_start++; + } /* and reset */ space_start = space_end = 0; } } - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } - dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; return i; } /* term_101: handle term, where trunc = Process # */ -static int term_101(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, +static int term_101(ZebraMaps zebra_maps, const char *index_type, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { const char *s0; @@ -291,57 +303,40 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "#", "#", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '#') { - dst[i++] = '.'; - dst[i++] = '*'; + i++; + wrbuf_puts(term_dict, ".*"); dst_term[j++] = *s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } - dst[i] = '\0'; dst_term[j++] = '\0'; *src = s0; return i; } /* term_103: handle term, where trunc = re-2 (regular expressions) */ -static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split, +static int term_103(ZebraMaps zebra_maps, const char *index_type, + const char **src, + WRBUF term_dict, int *errors, int space_split, char *dst_term) { int i = 0; @@ -349,7 +344,7 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, const char *s0; const char **map; - if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "^\\()[].*+?|", "(", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -365,40 +360,24 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, if (strchr("^\\()[].*+?|-", *s0)) { dst_term[j++] = *s0; - dst[i++] = *s0++; + wrbuf_putc(term_dict, *s0); + s0++; + i++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } - dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; @@ -406,17 +385,18 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, } /* term_103: handle term, where trunc = re-1 (regular expressions) */ -static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split, char *dst_term) +static int term_102(ZebraMaps zebra_maps, const char *index_type, + const char **src, + WRBUF term_dict, int space_split, char *dst_term) { - return term_103(zebra_maps, reg_type, src, dst, NULL, space_split, + return term_103(zebra_maps, index_type, src, term_dict, NULL, space_split, dst_term); } -/* term_104: handle term, where trunc = Process # and ! */ -static int term_104(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, +/* term_104: handle term, process # and ! */ +static int term_104(ZebraMaps zebra_maps, const char *index_type, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { const char *s0; @@ -424,13 +404,14 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "?*#", "?*#", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '?') { + i++; dst_term[j++] = *s0++; if (*s0 >= '0' && *s0 <= '9') { @@ -444,67 +425,48 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, limit = 20; while (--limit >= 0) { - dst[i++] = '.'; - dst[i++] = '?'; + wrbuf_puts(term_dict, ".?"); } } else { - dst[i++] = '.'; - dst[i++] = '*'; + wrbuf_puts(term_dict, ".*"); } } else if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; + i++; + wrbuf_puts(term_dict, ".*"); dst_term[j++] = *s0++; } else if (*s0 == '#') { - dst[i++] = '.'; + i++; + wrbuf_puts(term_dict, "."); dst_term[j++] = *s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } - dst[i] = '\0'; dst_term[j++] = '\0'; *src = s0; return i; } /* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, +static int term_105(ZebraMaps zebra_maps, const char *index_type, + const char **src, WRBUF term_dict, int space_split, char *dst_term, int right_truncate) { const char *s0; @@ -512,60 +474,39 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "*!", "*!", !space_split)) return 0; s0 = *src; while (*s0) { if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; + i++; + wrbuf_puts(term_dict, ".*"); dst_term[j++] = *s0++; } else if (*s0 == '!') { - dst[i++] = '.'; + i++; + wrbuf_putc(term_dict, '.'); dst_term[j++] = *s0++; } else { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } if (right_truncate) - { - dst[i++] = '.'; - dst[i++] = '*'; - } - dst[i] = '\0'; - + wrbuf_puts(term_dict, ".*"); dst_term[j++] = '\0'; *src = s0; return i; @@ -576,8 +517,10 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, * val: border value (inclusive) * islt: 1 if <=; 0 if >=. */ -static void gen_regular_rel(char *dst, int val, int islt) +static void gen_regular_rel(WRBUF term_dict, int val, int islt) { + char dst_buf[20*5*20]; /* assuming enough for expansion */ + char *dst = dst_buf; int dst_p; int w, d, i; int pos = 0; @@ -596,7 +539,6 @@ static void gen_regular_rel(char *dst, int val, int islt) if (!islt) { strcpy(dst, "([0-9]+|-("); - dst_p = strlen(dst); islt = 1; } else @@ -682,13 +624,19 @@ static void gen_regular_rel(char *dst, int val, int islt) strcat(dst, "[0-9]*"); } strcat(dst, "))"); + wrbuf_puts(term_dict, dst); } -void string_rel_add_char(char **term_p, const char *src, int *indx) +void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx) { + const char *src = wrbuf_cstr(wsrc); if (src[*indx] == '\\') - *(*term_p)++ = src[(*indx)++]; - *(*term_p)++ = src[(*indx)++]; + { + wrbuf_putc(term_p, src[*indx]); + (*indx)++; + } + wrbuf_putc(term_p, src[*indx]); + (*indx)++; } /* @@ -702,16 +650,15 @@ void string_rel_add_char(char **term_p, const char *src, int *indx) * ([^a-].*|a[^b-].*|ab[^c-].*|abc) */ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, char *term_dict, - oid_value attributeSet, - int reg_type, int space_split, char *term_dst, + const char **term_sub, WRBUF term_dict, + const Odr_oid *attributeSet, + const char *index_type, int space_split, char *term_dst, int *error_code) { AttrType relation; int relation_value; int i; - char *term_tmp = term_dict + strlen(term_dict); - char term_component[2*IT_MAX_WORD+20]; + WRBUF term_component = wrbuf_alloc(); attr_init_APT(&relation, zapt, 2); relation_value = attr_find(&relation, NULL); @@ -721,153 +668,148 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100(zh->reg->zebra_maps, reg_type, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <"); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; - + if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - - *term_tmp++ = '['; - - *term_tmp++ = '^'; - - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + string_rel_add_char(term_dict, term_component, &j); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + wrbuf_putc(term_dict, '['); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '^'); + + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); + + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); + + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; - yaz_log(YLOG_LOG, "term_dict=%s", term_dict); + wrbuf_putc(term_dict, ')'); break; case 2: - if (!term_100(zh->reg->zebra_maps, reg_type, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <="); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; - - *term_tmp++ = '^'; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + wrbuf_putc(term_dict, '^'); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i]; ) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, ')'); break; case 5: - if (!term_100 (zh->reg->zebra_maps, reg_type, - term_sub, term_component, space_split, term_dst)) + if (!term_100(zh->reg->zebra_maps, index_type, + term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >"); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); - - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i];) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '.'; - *term_tmp++ = '+'; - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '+'); + wrbuf_putc(term_dict, ')'); break; case 4: - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >="); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - if (term_component[i+1]) + if (i < wrbuf_len(term_component)-1) { - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); } else { - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); } - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; + wrbuf_putc(term_dict, ')'); break; case 3: case 102: @@ -875,12 +817,15 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!**term_sub) return 1; yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; - strcat(term_tmp, "("); - strcat(term_tmp, term_component); - strcat(term_tmp, ")"); + } + wrbuf_puts(term_dict, "("); + wrbuf_puts(term_dict, wrbuf_cstr(term_component)); + wrbuf_puts(term_dict, ")"); break; case 103: yaz_log(log_level_rpn, "Relation always matches"); @@ -890,16 +835,19 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; + wrbuf_destroy(term_component); return 0; } + wrbuf_destroy(term_component); return 1; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, int num_bases, char **basenames, char *term_dst, const char *xpath_use, @@ -955,9 +903,9 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh, static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, int num_bases, char **basenames, char *term_dst, const char *rank_type, @@ -970,13 +918,16 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, struct ord_list *ol; zint hits_limit_value; const char *term_ref_id_str = 0; - *rset = 0; + WRBUF term_dict = wrbuf_alloc(); + *rset = 0; term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); grep_info->isam_p_indx = 0; - res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, - reg_type, complete_flag, num_bases, basenames, + res = string_term(zh, zapt, term_sub, term_dict, + attributeSet, stream, grep_info, + index_type, complete_flag, num_bases, basenames, term_dst, xpath_use, &ol); + wrbuf_destroy(term_dict); if (res != ZEBRA_OK) return res; if (!*term_sub) /* no more terms ? */ @@ -986,7 +937,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx, term_dst, strlen(term_dst), rank_type, 1 /* preserve pos */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, hits_limit_value, + kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); if (!*rset) return ZEBRA_FAIL; @@ -995,16 +946,16 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, int num_bases, char **basenames, char *term_dst, const char *xpath_use, struct ord_list **ol) { - char term_dict[2*IT_MAX_WORD+4000]; - int j, r, base_no; + int r, base_no; AttrType truncation; int truncation_value; const char *termp; @@ -1015,7 +966,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, *ol = ord_list_create(stream); - rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, *index_type, &rcmi); attr_init_APT(&truncation, zapt, 5); truncation_value = attr_find(&truncation, NULL); yaz_log(log_level_rpn, "truncation value %d", truncation_value); @@ -1031,38 +982,44 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, termp = *term_sub; /* start of term for each database */ - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + + if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) { zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, basenames[base_no]); return ZEBRA_FAIL; } - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, attributeSet, &ord) != ZEBRA_OK) continue; + + wrbuf_rewind(term_dict); /* new dictionary regexp term */ + bases_ok++; *ol = ord_list_append(stream, *ol, ord); - ord_len = key_SU_encode (ord, ord_buf); + ord_len = key_SU_encode(ord, ord_buf); - term_dict[prefix_len++] = '('; + wrbuf_putc(term_dict, '('); + for (i = 0; ireg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + wrbuf_putc(term_dict, '('); + if (!term_100(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ".*)"); + wrbuf_puts(term_dict, ".*)"); break; case 2: /* keft truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 3: /* left&right truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ".*)"); + wrbuf_puts(term_dict, ".*)"); break; case 101: /* process # in term */ - term_dict[j++] = '('; - if (!term_101(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + wrbuf_putc(term_dict, '('); + if (!term_101(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_puts(term_dict, ")"); break; case 102: /* Regexp-1 */ - term_dict[j++] = '('; - if (!term_102(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + wrbuf_putc(term_dict, '('); + if (!term_102(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 103: /* Regexp-2 */ regex_range = 1; - term_dict[j++] = '('; - if (!term_103(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, ®ex_range, + wrbuf_putc(term_dict, '('); + if (!term_103(zh->reg->zebra_maps, index_type, + &termp, term_dict, ®ex_range, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 104: /* process # and ! in term */ - term_dict[j++] = '('; - if (!term_104(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + wrbuf_putc(term_dict, '('); + if (!term_104(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 105: /* process * and ! in term */ - term_dict[j++] = '('; - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 1)) + wrbuf_putc(term_dict, '('); + if (!term_105(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst, 1)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 106: /* process * and ! in term */ - term_dict[j++] = '('; - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 0)) + wrbuf_putc(term_dict, '('); + if (!term_105(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst, 0)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; default: zebra_setError_zint(zh, @@ -1174,12 +1131,13 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } if (1) { - char buf[80]; - const char *input = term_dict + prefix_len; + char buf[1000]; + const char *input = wrbuf_cstr(term_dict) + prefix_len; esc_str(buf, sizeof(buf), input, strlen(input)); } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len); - r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range, + yaz_log(log_level_rpn, "dict_lookup_grep: %s", + wrbuf_cstr(term_dict) + prefix_len); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, grep_info, &max_pos, ord_len /* number of "exact" chars */, grep_handle); @@ -1210,10 +1168,6 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, struct grep_info *grep_info, int reg_type) { - AttrType termset; - int termset_value_numeric; - const char *termset_value_string; - #ifdef TERM_COUNT grep_info->term_no = 0; #endif @@ -1223,35 +1177,50 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, grep_info->zh = zh; grep_info->reg_type = reg_type; grep_info->termset = 0; - if (!zapt) - return ZEBRA_OK; - attr_init_APT(&termset, zapt, 8); - termset_value_numeric = - attr_find_ex(&termset, NULL, &termset_value_string); - if (termset_value_numeric != -1) + if (zapt) { + AttrType truncmax; + int truncmax_value; + + attr_init_APT(&truncmax, zapt, 13); + truncmax_value = attr_find(&truncmax, NULL); + if (truncmax_value != -1) + grep_info->trunc_max = truncmax_value; + } + if (zapt) + { + AttrType termset; + int termset_value_numeric; + const char *termset_value_string; + + attr_init_APT(&termset, zapt, 8); + termset_value_numeric = + attr_find_ex(&termset, NULL, &termset_value_string); + if (termset_value_numeric != -1) + { #if TERMSET_DISABLE - zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset"); - return ZEBRA_FAIL; + zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset"); + return ZEBRA_FAIL; #else - char resname[32]; - const char *termset_name = 0; - if (termset_value_numeric != -2) - { - - sprintf(resname, "%d", termset_value_numeric); - termset_name = resname; - } - else + char resname[32]; + const char *termset_name = 0; + if (termset_value_numeric != -2) + { + + sprintf(resname, "%d", termset_value_numeric); + termset_name = resname; + } + else termset_name = termset_value_string; - yaz_log(log_level_rpn, "creating termset set %s", termset_name); - grep_info->termset = resultSetAdd(zh, termset_name, 1); - if (!grep_info->termset) - { - zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name); - return ZEBRA_FAIL; - } + yaz_log(log_level_rpn, "creating termset set %s", termset_name); + grep_info->termset = resultSetAdd(zh, termset_name, 1); + if (!grep_info->termset) + { + zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name); + return ZEBRA_FAIL; + } #endif + } } return ZEBRA_OK; } @@ -1277,9 +1246,9 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, static ZEBRA_RES term_list_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, int num_bases, char **basenames, @@ -1294,7 +1263,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, *num_result_sets = 0; *term_dst = 0; - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while(1) { @@ -1312,7 +1281,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, } res = term_trunc(zh, zapt, &termp, attributeSet, stream, &grep_info, - reg_type, complete_flag, + index_type, complete_flag, num_bases, basenames, term_dst, rank_type, xpath_use, rset_nmem, @@ -1323,7 +1292,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, int i; for (i = 0; i < *num_result_sets; i++) rset_delete((*result_sets)[i]); - grep_info_delete (&grep_info); + grep_info_delete(&grep_info); return res; } if ((*result_sets)[*num_result_sets] == 0) @@ -1339,8 +1308,8 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, - int reg_type, + const Odr_oid *attributeSet, + const char *index_type, int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, @@ -1368,7 +1337,7 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type)) + if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, *index_type)) { zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, position_value); @@ -1391,18 +1360,18 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, char *val; ISAM_P isam_p; - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) { zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, basenames[base_no]); return ZEBRA_FAIL; } - if (zebra_apt_get_ord(zh, zapt, reg_type, 0, + if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeSet, &ord) != ZEBRA_OK) continue; - ord_len = key_SU_encode (ord, ord_buf); + ord_len = key_SU_encode(ord, ord_buf); memcpy(term_dict, ord_buf, ord_len); strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); val = dict_lookup(zh->reg->dict, term_dict); @@ -1431,9 +1400,9 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, int num_bases, char **basenames, @@ -1445,7 +1414,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, int num_result_sets = 0; ZEBRA_RES res = term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, + stream, index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, @@ -1458,7 +1427,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, { RSET first_set = 0; res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, + index_type, num_bases, basenames, rset_nmem, &first_set, kc); @@ -1491,9 +1460,10 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, int num_bases, char **basenames, @@ -1506,7 +1476,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, int i; ZEBRA_RES res = term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, + stream, index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, @@ -1518,7 +1488,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, { RSET first_set = 0; res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, + index_type, num_bases, basenames, rset_nmem, &first_set, kc); @@ -1558,9 +1528,10 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, int num_bases, char **basenames, @@ -1573,7 +1544,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, int i; ZEBRA_RES res = term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, + stream, index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, @@ -1585,7 +1556,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, { RSET first_set = 0; res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, + index_type, num_bases, basenames, rset_nmem, &first_set, kc); @@ -1626,11 +1597,11 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - char *term_dict, - oid_value attributeSet, + WRBUF term_dict, + const Odr_oid *attributeSet, struct grep_info *grep_info, int *max_pos, - int reg_type, + const char *index_type, char *term_dst, int *error_code) { @@ -1638,7 +1609,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_value; int term_value; int r; - char *term_tmp = term_dict + strlen(term_dict); + WRBUF term_num = wrbuf_alloc(); *error_code = 0; attr_init_APT(&relation, zapt, 2); @@ -1650,44 +1621,59 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value-1, 1); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value-1, 1); break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 1); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 1); break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 0); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 0); break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value+1, 0); + } + term_value = atoi(wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value+1, 0); break; case -1: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) - return 0; - term_value = atoi (term_tmp); - sprintf(term_tmp, "(0*%d)", term_value); + { + wrbuf_destroy(term_num); + return 0; + } + term_value = atoi(wrbuf_cstr(term_num)); + wrbuf_printf(term_dict, "(0*%d)", term_value); break; case 103: /* term_tmp untouched.. */ @@ -1696,31 +1682,32 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; - return 0; + wrbuf_destroy(term_num); + return 0; } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos, - 0, grep_handle); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), + 0, grep_info, max_pos, 0, grep_handle); if (r == 1) zebra_set_partial_result(zh); else if (r) yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r); yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx); + wrbuf_destroy(term_num); return 1; } static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, int num_bases, char **basenames, char *term_dst, const char *xpath_use, struct ord_list **ol) { - char term_dict[2*IT_MAX_WORD+2]; int base_no; const char *termp; struct rpn_char_map_info rcmi; @@ -1729,44 +1716,45 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, *ol = ord_list_create(stream); - rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, *index_type, &rcmi); for (base_no = 0; base_no < num_bases; base_no++) { - int max_pos, prefix_len = 0; + int max_pos; int relation_error = 0; int ord, ord_len, i; char ord_buf[32]; termp = *term_sub; - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) { zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, basenames[base_no]); return ZEBRA_FAIL; } - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, attributeSet, &ord) != ZEBRA_OK) continue; bases_ok++; + wrbuf_rewind(term_dict); + *ol = ord_list_append(stream, *ol, ord); - ord_len = key_SU_encode (ord, ord_buf); + ord_len = key_SU_encode(ord, ord_buf); - term_dict[prefix_len++] = '('; + wrbuf_putc(term_dict, '('); for (i = 0; i < ord_len; i++) { - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = ord_buf[i]; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, ord_buf[i]); } - term_dict[prefix_len++] = ')'; - term_dict[prefix_len] = '\0'; + wrbuf_putc(term_dict, ')'); if (!numeric_relation(zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, reg_type, + attributeSet, grep_info, &max_pos, index_type, term_dst, &relation_error)) { if (relation_error) @@ -1789,9 +1777,10 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, int num_bases, char **basenames, @@ -1812,11 +1801,12 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) { struct ord_list *ol; + WRBUF term_dict = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; @@ -1829,9 +1819,11 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, } yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp); grep_info.isam_p_indx = 0; - res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info, - reg_type, complete_flag, num_bases, basenames, + res = numeric_term(zh, zapt, &termp, term_dict, + attributeSet, stream, &grep_info, + index_type, complete_flag, num_bases, basenames, term_dst, xpath_use, &ol); + wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) break; yaz_log(YLOG_DEBUG, "term: %s", term_dst); @@ -1841,7 +1833,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, strlen(term_dst), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, + kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); if (!result_sets[num_result_sets]) @@ -1869,32 +1861,46 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - RSFD rsfd; - struct it_key key; - int sys; - *rset = rset_create_temp(rset_nmem, kc, kc->scope, - res_get (zh->res, "setTmpDir"),0 ); - rsfd = rset_open(*rset, RSETF_WRITE); + Record rec; + zint sysno = atozint(termz); - sys = atoi(termz); - if (sys <= 0) - sys = 1; - key.mem[0] = sys; - key.mem[1] = 1; - key.len = 2; - rset_write (rsfd, &key); - rset_close (rsfd); + if (sysno <= 0) + sysno = 0; + rec = rec_get(zh->reg->records, sysno); + if (!rec) + sysno = 0; + + rec_free(&rec); + + if (sysno <= 0) + { + *rset = rset_create_null(rset_nmem, kc, 0); + } + else + { + RSFD rsfd; + struct it_key key; + *rset = rset_create_temp(rset_nmem, kc, kc->scope, + res_get(zh->res, "setTmpDir"), 0); + rsfd = rset_open(*rset, RSETF_WRITE); + + key.mem[0] = sysno; + key.mem[1] = 1; + key.len = 2; + rset_write(rsfd, &key); + rset_close(rsfd); + } return ZEBRA_OK; } static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, @@ -1906,8 +1912,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, AttrType sort_relation_type; Z_SortKeySpec *sks; Z_SortKey *sk; - int oid[OID_SIZE]; - oident oe; char termz[20]; attr_init_APT(&sort_relation_type, zapt, 7); @@ -1925,18 +1929,12 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (zapt->term->which != Z_Term_general) i = 0; else - i = atoi_n ((char *) zapt->term->u.general->buf, + i = atoi_n((char *) zapt->term->u.general->buf, zapt->term->u.general->len); if (i >= sort_sequence->num_specs) i = 0; sprintf(termz, "%d", i); - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = attributeSet; - if (!oid_ent_to_oid (&oe, oid)) - return ZEBRA_FAIL; - sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks)); sks->sortElement = (Z_SortElement *) nmem_malloc(stream, sizeof(*sks->sortElement)); @@ -1947,7 +1945,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes = (Z_SortAttributes *) nmem_malloc(stream, sizeof(*sk->u.sortAttributes)); - sk->u.sortAttributes->id = oid; + sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; sks->sortRelation = (int *) @@ -1972,11 +1970,11 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, + const Odr_oid *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem) { - oid_value curAttributeSet = attributeSet; + const Odr_oid *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; @@ -1992,55 +1990,53 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static RSET xpath_trunc(ZebraHandle zh, NMEM stream, - int reg_type, const char *term, + const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc) { - RSET rset; struct grep_info grep_info; - char term_dict[2048]; - char ord_buf[32]; - int prefix_len = 0; int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, - reg_type, - xpath_use); - int ord_len, i, r, max_pos; - int term_type = Z_Term_characterString; - const char *flags = "void"; - + index_type, xpath_use); if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); if (ord < 0) return rset_create_null(rset_nmem, kc, 0); - if (prefix_len) - term_dict[prefix_len++] = '|'; else - term_dict[prefix_len++] = '('; - - ord_len = key_SU_encode (ord, ord_buf); - for (i = 0; ireg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); + yaz_log(YLOG_DEBUG, "%s %d positions", term, + grep_info.isam_p_indx); + rset = rset_trunc(zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term, strlen(term), + flags, 1, term_type, rset_nmem, + kc, kc->scope, 0, index_type, 0 /* hits_limit */, + 0 /* term_ref_id_str */); + grep_info_delete(&grep_info); + wrbuf_destroy(term_dict); + return rset; } - term_dict[prefix_len++] = ')'; - strcpy(term_dict+prefix_len, term); - - grep_info.isam_p_indx = 0; - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, - &grep_info, &max_pos, 0, grep_handle); - yaz_log(YLOG_DEBUG, "%s %d positions", term, - grep_info.isam_p_indx); - rset = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term, strlen(term), - flags, 1, term_type,rset_nmem, - kc, kc->scope, 0, reg_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - grep_info_delete(&grep_info); - return rset; } static @@ -2088,14 +2084,14 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, */ - dict_grep_cmap (zh->reg->dict, 0, 0); + dict_grep_cmap(zh->reg->dict, 0, 0); for (base_no = 0; base_no < num_bases; base_no++) { int level = xpath_len; int first_path = 1; - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) { zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, basenames[base_no]); @@ -2150,34 +2146,34 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, cp++; } } - wrbuf_puts(wbuf, ""); rset_attr = xpath_trunc( - zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, + zh, stream, "0", wrbuf_cstr(wbuf), + ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); - wrbuf_free(wbuf, 1); + wrbuf_destroy(wbuf); } else { if (!first_path) { - wrbuf_free(xpath_rev, 1); + wrbuf_destroy(xpath_rev); continue; } } - yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, - wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev)); + yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, + wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { - rset_start_tag = xpath_trunc(zh, stream, '0', - wrbuf_buf(xpath_rev), + rset_start_tag = xpath_trunc(zh, stream, "0", + wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); if (always_matches) rset = rset_start_tag; else { - rset_end_tag = xpath_trunc(zh, stream, '0', - wrbuf_buf(xpath_rev), + rset_end_tag = xpath_trunc(zh, stream, "0", + wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_END, rset_nmem, kc); @@ -2186,7 +2182,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, rset_end_tag, rset_attr); } } - wrbuf_free(xpath_rev, 1); + wrbuf_destroy(xpath_rev); first_path = 0; } } @@ -2196,16 +2192,73 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, NMEM stream, + Z_SortKeySpecList *sort_sequence, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc); + static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { + RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets)); + ZEBRA_RES res = ZEBRA_OK; + int i; + for (i = 0; i < num_bases; i++) + { + + if (zebraExplain_curDatabase(zh->reg->zei, basenames[i])) + { + zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, + basenames[i]); + res = ZEBRA_FAIL; + break; + } + res = rpn_search_database(zh, zapt, attributeSet, stream, + sort_sequence, 1, basenames+i, + rset_nmem, rsets+i, kc); + if (res != ZEBRA_OK) + break; + } + if (res != ZEBRA_OK) + { /* must clean up the already created sets */ + int j; + for (i = 0; j < i; j++) + rset_delete(rsets[j]); + *rset = 0; + } + else + { + if (num_bases == 1) + *rset = rsets[0]; + else if (num_bases == 0) + *rset = rset_create_null(rset_nmem, kc, 0); + else + *rset = rset_create_and(rset_nmem, kc, kc->scope, + num_bases, rsets); + } + return res; +} + +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, NMEM stream, + Z_SortKeySpecList *sort_sequence, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) +{ ZEBRA_RES res = ZEBRA_OK; - unsigned reg_id; + const char *index_type; char *search_type = NULL; char rank_type[128]; int complete_flag; @@ -2220,10 +2273,10 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, log_level_rpn = yaz_log_module_level("rpn"); log_level_set = 1; } - zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type, + zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag); - yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id); + yaz_log(YLOG_DEBUG, "index_type=%s", index_type); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type); @@ -2273,7 +2326,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!strcmp(search_type, "phrase")) { res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, rset, kc); @@ -2281,7 +2334,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else if (!strcmp(search_type, "and-list")) { res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, rset, kc); @@ -2289,7 +2342,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else if (!strcmp(search_type, "or-list")) { res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, rset, kc); @@ -2302,7 +2355,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else if (!strcmp(search_type, "numeric")) { res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, num_bases, basenames, rset_nmem, rset, kc); @@ -2322,7 +2375,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, @@ -2362,7 +2415,7 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, @@ -2399,7 +2452,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames,