X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnsearch.c;h=269810b5d4df740e06c9ff8f483f0a0fa1d4f4f9;hb=43e4297c07b9c8b29bfc1ea647fc27456198f6ce;hp=aaae58a5a1c6c5d0369d0ca3bc977d4f2b9f59c9;hpb=3e4a78274a6cb7a99f3e90967ea30c830ffbf8c3;p=idzebra-moved-to-github.git diff --git a/index/rpnsearch.c b/index/rpnsearch.c index aaae58a..269810b 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,4 +1,4 @@ -/* $Id: rpnsearch.c,v 1.10 2007-03-19 21:50:39 adam Exp $ +/* $Id: rpnsearch.c,v 1.14 2007-05-14 14:05:21 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -206,9 +206,36 @@ static void esc_str(char *out_buf, size_t out_size, #define REGEX_CHARS " []()|.*+?!" +static void add_non_space(const char *start, const char *end, + WRBUF term_dict, + char *dst_term, int *dst_ptr, + const char **map, int q_map_match) +{ + size_t sz = end - start; + memcpy(dst_term + *dst_ptr, start, sz); + (*dst_ptr) += sz; + if (!q_map_match) + { + while (start < end) + { + if (strchr(REGEX_CHARS, *start)) + wrbuf_putc(term_dict, '\\'); + wrbuf_putc(term_dict, *start); + start++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + wrbuf_puts(term_dict, map[0]); + } +} + /* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { const char *s0; @@ -246,36 +273,21 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, while (space_start < space_end) { if (strchr(REGEX_CHARS, *space_start)) - dst[i++] = '\\'; + wrbuf_putc(term_dict, '\\'); dst_term[j++] = *space_start; - dst[i++] = *space_start++; + wrbuf_putc(term_dict, *space_start); + space_start++; + } /* and reset */ space_start = space_end = 0; } } - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } - dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; return i; @@ -283,7 +295,7 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, /* term_101: handle term, where trunc = Process # */ static int term_101(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { const char *s0; @@ -298,8 +310,8 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, { if (*s0 == '#') { - dst[i++] = '.'; - dst[i++] = '*'; + i++; + wrbuf_puts(term_dict, ".*"); dst_term[j++] = *s0++; } else @@ -311,29 +323,11 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } - dst[i] = '\0'; dst_term[j++] = '\0'; *src = s0; return i; @@ -341,7 +335,7 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, /* term_103: handle term, where trunc = re-2 (regular expressions) */ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split, + WRBUF term_dict, int *errors, int space_split, char *dst_term) { int i = 0; @@ -365,7 +359,9 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, if (strchr("^\\()[].*+?|-", *s0)) { dst_term[j++] = *s0; - dst[i++] = *s0++; + wrbuf_putc(term_dict, *s0); + s0++; + i++; } else { @@ -376,29 +372,11 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } - dst[i] = '\0'; dst_term[j] = '\0'; *src = s0; @@ -407,16 +385,16 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, /* term_103: handle term, where trunc = re-1 (regular expressions) */ static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split, char *dst_term) + WRBUF term_dict, int space_split, char *dst_term) { - return term_103(zebra_maps, reg_type, src, dst, NULL, space_split, + return term_103(zebra_maps, reg_type, src, term_dict, NULL, space_split, dst_term); } -/* term_104: handle term, where trunc = Process # and ! */ +/* term_104: handle term, process # and ! */ static int term_104(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { const char *s0; @@ -431,6 +409,7 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, { if (*s0 == '?') { + i++; dst_term[j++] = *s0++; if (*s0 >= '0' && *s0 <= '9') { @@ -444,25 +423,24 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, limit = 20; while (--limit >= 0) { - dst[i++] = '.'; - dst[i++] = '?'; + wrbuf_puts(term_dict, ".?"); } } else { - dst[i++] = '.'; - dst[i++] = '*'; + wrbuf_puts(term_dict, ".*"); } } else if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; + i++; + wrbuf_puts(term_dict, ".*"); dst_term[j++] = *s0++; } else if (*s0 == '#') { - dst[i++] = '.'; + i++; + wrbuf_puts(term_dict, "."); dst_term[j++] = *s0++; } else @@ -474,29 +452,11 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } - dst[i] = '\0'; dst_term[j++] = '\0'; *src = s0; return i; @@ -504,7 +464,7 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, /* term_105/106: handle term, where trunc = Process * and ! and right trunc */ static int term_105(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, + const char **src, WRBUF term_dict, int space_split, char *dst_term, int right_truncate) { const char *s0; @@ -519,13 +479,14 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, { if (*s0 == '*') { - dst[i++] = '.'; - dst[i++] = '*'; + i++; + wrbuf_puts(term_dict, ".*"); dst_term[j++] = *s0++; } else if (*s0 == '!') { - dst[i++] = '.'; + i++; + wrbuf_putc(term_dict, '.'); dst_term[j++] = *s0++; } else @@ -537,35 +498,13 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, if (space_split && **map == *CHR_SPACE) break; - /* add non-space char */ - memcpy(dst_term+j, s1, s0 - s1); - j += (s0 - s1); - if (!q_map_match) - { - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst[i++] = *s1++; - } - } - else - { - char tmpbuf[80]; - esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); - - strcpy(dst + i, map[0]); - i += strlen(map[0]); - } + i++; + add_non_space(s1, s0, term_dict, dst_term, &j, + map, q_map_match); } } if (right_truncate) - { - dst[i++] = '.'; - dst[i++] = '*'; - } - dst[i] = '\0'; - + wrbuf_puts(term_dict, ".*"); dst_term[j++] = '\0'; *src = s0; return i; @@ -576,8 +515,10 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, * val: border value (inclusive) * islt: 1 if <=; 0 if >=. */ -static void gen_regular_rel(char *dst, int val, int islt) +static void gen_regular_rel(WRBUF term_dict, int val, int islt) { + char dst_buf[20*5*20]; /* assuming enough for expansion */ + char *dst = dst_buf; int dst_p; int w, d, i; int pos = 0; @@ -596,7 +537,6 @@ static void gen_regular_rel(char *dst, int val, int islt) if (!islt) { strcpy(dst, "([0-9]+|-("); - dst_p = strlen(dst); islt = 1; } else @@ -682,13 +622,19 @@ static void gen_regular_rel(char *dst, int val, int islt) strcat(dst, "[0-9]*"); } strcat(dst, "))"); + wrbuf_puts(term_dict, dst); } -void string_rel_add_char(char **term_p, const char *src, int *indx) +void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx) { + const char *src = wrbuf_cstr(wsrc); if (src[*indx] == '\\') - *(*term_p)++ = src[(*indx)++]; - *(*term_p)++ = src[(*indx)++]; + { + wrbuf_putc(term_p, src[*indx]); + (*indx)++; + } + wrbuf_putc(term_p, src[*indx]); + (*indx)++; } /* @@ -702,16 +648,15 @@ void string_rel_add_char(char **term_p, const char *src, int *indx) * ([^a-].*|a[^b-].*|ab[^c-].*|abc) */ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, char *term_dict, - oid_value attributeSet, + const char **term_sub, WRBUF term_dict, + const Odr_oid *attributeSet, int reg_type, int space_split, char *term_dst, int *error_code) { AttrType relation; int relation_value; int i; - char *term_tmp = term_dict + strlen(term_dict); - char term_component[2*IT_MAX_WORD+20]; + WRBUF term_component = wrbuf_alloc(); attr_init_APT(&relation, zapt, 2); relation_value = attr_find(&relation, NULL); @@ -724,150 +669,145 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <"); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; - + if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - - *term_tmp++ = '['; - - *term_tmp++ = '^'; + string_rel_add_char(term_dict, term_component, &j); - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + wrbuf_putc(term_dict, '['); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; - - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '^'); + + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); + + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); + + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; - yaz_log(YLOG_LOG, "term_dict=%s", term_dict); + wrbuf_putc(term_dict, ')'); break; case 2: if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation <="); - *term_tmp++ = '('; - for (i = 0; term_component[i]; ) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; - - *term_tmp++ = '^'; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = 1; - *term_tmp++ = FIRST_IN_FIELD_CHAR; + wrbuf_putc(term_dict, '^'); - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR); - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i]; ) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, ')'); break; case 5: if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >"); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); - - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); - *term_tmp++ = '|'; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, '|'); } - for (i = 0; term_component[i];) - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '.'; - *term_tmp++ = '+'; - *term_tmp++ = ')'; - *term_tmp = '\0'; + for (i = 0; i < wrbuf_len(term_component); ) + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '+'); + wrbuf_putc(term_dict, ')'); break; case 4: if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; + } yaz_log(log_level_rpn, "Relation >="); - *term_tmp++ = '('; - for (i = 0; term_component[i];) + wrbuf_putc(term_dict, '('); + for (i = 0; i < wrbuf_len(term_component); ) { int j = 0; if (i) - *term_tmp++ = '|'; + wrbuf_putc(term_dict, '|'); while (j < i) - string_rel_add_char(&term_tmp, term_component, &j); - *term_tmp++ = '['; + string_rel_add_char(term_dict, term_component, &j); + wrbuf_putc(term_dict, '['); - if (term_component[i+1]) + if (i < wrbuf_len(term_component)-1) { - *term_tmp++ = '^'; - *term_tmp++ = '-'; - string_rel_add_char(&term_tmp, term_component, &i); + wrbuf_putc(term_dict, '^'); + wrbuf_putc(term_dict, '-'); + string_rel_add_char(term_dict, term_component, &i); } else { - string_rel_add_char(&term_tmp, term_component, &i); - *term_tmp++ = '-'; + string_rel_add_char(term_dict, term_component, &i); + wrbuf_putc(term_dict, '-'); } - *term_tmp++ = ']'; - *term_tmp++ = '.'; - *term_tmp++ = '*'; - - if ((term_tmp - term_dict) > IT_MAX_WORD) - break; + wrbuf_putc(term_dict, ']'); + wrbuf_putc(term_dict, '.'); + wrbuf_putc(term_dict, '*'); } - *term_tmp++ = ')'; - *term_tmp = '\0'; + wrbuf_putc(term_dict, ')'); break; case 3: case 102: @@ -877,10 +817,13 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, yaz_log(log_level_rpn, "Relation ="); if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_component, space_split, term_dst)) + { + wrbuf_destroy(term_component); return 0; - strcat(term_tmp, "("); - strcat(term_tmp, term_component); - strcat(term_tmp, ")"); + } + wrbuf_puts(term_dict, "("); + wrbuf_puts(term_dict, wrbuf_cstr(term_component)); + wrbuf_puts(term_dict, ")"); break; case 103: yaz_log(log_level_rpn, "Relation always matches"); @@ -890,14 +833,17 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; + wrbuf_destroy(term_component); return 0; } + wrbuf_destroy(term_component); return 1; } static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -955,7 +901,7 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh, static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -970,13 +916,16 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, struct ord_list *ol; zint hits_limit_value; const char *term_ref_id_str = 0; - *rset = 0; + WRBUF term_dict = wrbuf_alloc(); + *rset = 0; term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); grep_info->isam_p_indx = 0; - res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, + res = string_term(zh, zapt, term_sub, term_dict, + attributeSet, stream, grep_info, reg_type, complete_flag, num_bases, basenames, term_dst, xpath_use, &ol); + wrbuf_destroy(term_dict); if (res != ZEBRA_OK) return res; if (!*term_sub) /* no more terms ? */ @@ -995,7 +944,8 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -1003,8 +953,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *xpath_use, struct ord_list **ol) { - char term_dict[2*IT_MAX_WORD+4000]; - int j, r, base_no; + int r, base_no; AttrType truncation; int truncation_value; const char *termp; @@ -1031,6 +980,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, termp = *term_sub; /* start of term for each database */ + if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) { zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, @@ -1042,20 +992,25 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, attributeSet, &ord) != ZEBRA_OK) continue; + + wrbuf_rewind(term_dict); /* new dictionary regexp term */ + bases_ok++; *ol = ord_list_append(stream, *ol, ord); ord_len = key_SU_encode (ord, ord_buf); - term_dict[prefix_len++] = '('; + wrbuf_putc(term_dict, '('); + for (i = 0; ireg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ".*)"); + wrbuf_puts(term_dict, ".*)"); break; case 2: /* keft truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; + wrbuf_puts(term_dict, "(.*"); if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 3: /* left&right truncation */ - term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; + wrbuf_puts(term_dict, "(.*"); if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ".*)"); + wrbuf_puts(term_dict, ".*)"); break; case 101: /* process # in term */ - term_dict[j++] = '('; + wrbuf_putc(term_dict, '('); if (!term_101(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_puts(term_dict, ")"); break; case 102: /* Regexp-1 */ - term_dict[j++] = '('; + wrbuf_putc(term_dict, '('); if (!term_102(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 103: /* Regexp-2 */ regex_range = 1; - term_dict[j++] = '('; + wrbuf_putc(term_dict, '('); if (!term_103(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, ®ex_range, + &termp, term_dict, ®ex_range, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 104: /* process # and ! in term */ - term_dict[j++] = '('; + wrbuf_putc(term_dict, '('); if (!term_104(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) + &termp, term_dict, space_split, term_dst)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 105: /* process * and ! in term */ - term_dict[j++] = '('; + wrbuf_putc(term_dict, '('); if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 1)) + &termp, term_dict, space_split, term_dst, 1)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; case 106: /* process * and ! in term */ - term_dict[j++] = '('; + wrbuf_putc(term_dict, '('); if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 0)) + &termp, term_dict, space_split, term_dst, 0)) { *term_sub = 0; return ZEBRA_OK; } - strcat(term_dict, ")"); + wrbuf_putc(term_dict, ')'); break; default: zebra_setError_zint(zh, @@ -1174,12 +1129,13 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } if (1) { - char buf[80]; - const char *input = term_dict + prefix_len; + char buf[1000]; + const char *input = wrbuf_cstr(term_dict) + prefix_len; esc_str(buf, sizeof(buf), input, strlen(input)); } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len); - r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range, + yaz_log(log_level_rpn, "dict_lookup_grep: %s", + wrbuf_cstr(term_dict) + prefix_len); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, grep_info, &max_pos, ord_len /* number of "exact" chars */, grep_handle); @@ -1288,7 +1244,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, static ZEBRA_RES term_list_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1350,7 +1306,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, + const Odr_oid *attributeSet, int reg_type, int num_bases, char **basenames, NMEM rset_nmem, @@ -1442,7 +1398,7 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1502,7 +1458,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1569,7 +1525,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1637,8 +1593,8 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - char *term_dict, - oid_value attributeSet, + WRBUF term_dict, + const Odr_oid *attributeSet, struct grep_info *grep_info, int *max_pos, int reg_type, @@ -1649,7 +1605,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_value; int term_value; int r; - char *term_tmp = term_dict + strlen(term_dict); + WRBUF term_num = wrbuf_alloc(); *error_code = 0; attr_init_APT(&relation, zapt, 2); @@ -1661,44 +1617,59 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value-1, 1); + } + term_value = atoi (wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value-1, 1); break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 1); + } + term_value = atoi (wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 1); break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value, 0); + } + term_value = atoi (wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value, 0); break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, term_dst)) + { + wrbuf_destroy(term_num); return 0; - term_value = atoi (term_tmp); - gen_regular_rel(term_tmp, term_value+1, 0); + } + term_value = atoi (wrbuf_cstr(term_num)); + gen_regular_rel(term_dict, term_value+1, 0); break; case -1: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, term_dst)) - return 0; - term_value = atoi (term_tmp); - sprintf(term_tmp, "(0*%d)", term_value); + { + wrbuf_destroy(term_num); + return 0; + } + term_value = atoi (wrbuf_cstr(term_num)); + wrbuf_printf(term_dict, "(0*%d)", term_value); break; case 103: /* term_tmp untouched.. */ @@ -1707,23 +1678,25 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE; - return 0; + wrbuf_destroy(term_num); + return 0; } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos, - 0, grep_handle); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), + 0, grep_info, max_pos, 0, grep_handle); if (r == 1) zebra_set_partial_result(zh); else if (r) yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r); yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx); + wrbuf_destroy(term_num); return 1; } static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + WRBUF term_dict, + const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -1731,7 +1704,6 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *xpath_use, struct ord_list **ol) { - char term_dict[2*IT_MAX_WORD+2]; int base_no; const char *termp; struct rpn_char_map_info rcmi; @@ -1744,7 +1716,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, for (base_no = 0; base_no < num_bases; base_no++) { - int max_pos, prefix_len = 0; + int max_pos; int relation_error = 0; int ord, ord_len, i; char ord_buf[32]; @@ -1763,18 +1735,19 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, continue; bases_ok++; + wrbuf_rewind(term_dict); + *ol = ord_list_append(stream, *ol, ord); ord_len = key_SU_encode (ord, ord_buf); - term_dict[prefix_len++] = '('; + wrbuf_putc(term_dict, '('); for (i = 0; i < ord_len; i++) { - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = ord_buf[i]; + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, ord_buf[i]); } - term_dict[prefix_len++] = ')'; - term_dict[prefix_len] = '\0'; + wrbuf_putc(term_dict, ')'); if (!numeric_relation(zh, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos, reg_type, @@ -1800,7 +1773,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1828,6 +1801,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, while (1) { struct ord_list *ol; + WRBUF term_dict = wrbuf_alloc(); if (alloc_sets == num_result_sets) { int add = 10; @@ -1840,9 +1814,11 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, } yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp); grep_info.isam_p_indx = 0; - res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info, + res = numeric_term(zh, zapt, &termp, term_dict, + attributeSet, stream, &grep_info, reg_type, complete_flag, num_bases, basenames, term_dst, xpath_use, &ol); + wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) break; yaz_log(YLOG_DEBUG, "term: %s", term_dst); @@ -1880,7 +1856,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, @@ -1919,7 +1895,7 @@ static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, } static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, @@ -1931,8 +1907,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, AttrType sort_relation_type; Z_SortKeySpec *sks; Z_SortKey *sk; - int oid[OID_SIZE]; - oident oe; char termz[20]; attr_init_APT(&sort_relation_type, zapt, 7); @@ -1956,12 +1930,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, i = 0; sprintf(termz, "%d", i); - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = attributeSet; - if (!oid_ent_to_oid (&oe, oid)) - return ZEBRA_FAIL; - sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks)); sks->sortElement = (Z_SortElement *) nmem_malloc(stream, sizeof(*sks->sortElement)); @@ -1972,7 +1940,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes = (Z_SortAttributes *) nmem_malloc(stream, sizeof(*sk->u.sortAttributes)); - sk->u.sortAttributes->id = oid; + sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; sks->sortRelation = (int *) @@ -1997,11 +1965,11 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, + const Odr_oid *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem) { - oid_value curAttributeSet = attributeSet; + const Odr_oid *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; @@ -2022,50 +1990,48 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, NMEM rset_nmem, struct rset_key_control *kc) { - RSET rset; struct grep_info grep_info; - char term_dict[2048]; - char ord_buf[32]; - int prefix_len = 0; int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, - reg_type, - xpath_use); - int ord_len, i, r, max_pos; - int term_type = Z_Term_characterString; - const char *flags = "void"; - + reg_type, xpath_use); if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); if (ord < 0) return rset_create_null(rset_nmem, kc, 0); - if (prefix_len) - term_dict[prefix_len++] = '|'; else - term_dict[prefix_len++] = '('; - - ord_len = key_SU_encode (ord, ord_buf); - for (i = 0; ireg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); + yaz_log(YLOG_DEBUG, "%s %d positions", term, + grep_info.isam_p_indx); + rset = rset_trunc(zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term, strlen(term), + flags, 1, term_type, rset_nmem, + kc, kc->scope, 0, reg_type, 0 /* hits_limit */, + 0 /* term_ref_id_str */); + grep_info_delete(&grep_info); + wrbuf_destroy(term_dict); + return rset; } - term_dict[prefix_len++] = ')'; - strcpy(term_dict+prefix_len, term); - - grep_info.isam_p_indx = 0; - r = dict_lookup_grep(zh->reg->dict, term_dict, 0, - &grep_info, &max_pos, 0, grep_handle); - yaz_log(YLOG_DEBUG, "%s %d positions", term, - grep_info.isam_p_indx); - rset = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term, strlen(term), - flags, 1, term_type,rset_nmem, - kc, kc->scope, 0, reg_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - grep_info_delete(&grep_info); - return rset; } static @@ -2221,7 +2187,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, NMEM rset_nmem, @@ -2346,7 +2312,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, @@ -2386,7 +2352,7 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, @@ -2423,7 +2389,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames,