X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzrpn.c;h=4788742864fb20e8c3ff7cfa578c5776a0866278;hp=472e7bdfe5911501d6f96b0118cca3eadbb16bc1;hb=5dd30bc682bf1338ed6beb1a485ccde86b06cc5a;hpb=3b2776240fdaec214a1046cdc0b9c45f2fc88a14 diff --git a/index/zrpn.c b/index/zrpn.c index 472e7bd..4788742 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.174 2005-04-14 09:03:24 adam Exp $ +/* $Id: zrpn.c,v 1.188 2005-05-03 14:04:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -20,7 +20,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - #include #include #ifdef WIN32 @@ -30,27 +29,13 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #endif #include +#include #include "index.h" #include #include #include -/* maximum number of terms in an and/or/phrase item */ -#define TERM_LIST_LENGTH_MAX 256 - -static const struct key_control it_ctrl = -{ - sizeof(struct it_key), - 2, /* we have sysnos and seqnos in this key, nothing more */ - key_compare_it, - key_logdump_txt, /* FIXME - clean up these functions */ - key_get_seq, -}; - - -const struct key_control *key_it_ctrl = &it_ctrl; - struct rpn_char_map_info { ZebraMaps zm; @@ -273,7 +258,7 @@ static int grep_handle(char *name, const char *info, void *p) } static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, - const char *ct1, const char *ct2, int first) + const char *ct1, const char *ct2, int first) { const char *s1, *s0 = *src; const char **map; @@ -326,8 +311,8 @@ static void esc_str(char *out_buf, int out_size, /* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) + const char **src, char *dst, int space_split, + char *dst_term) { const char *s0; const char **map; @@ -401,8 +386,8 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, /* term_101: handle term, where trunc = Process # */ static int term_101(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) + const char **src, char *dst, int space_split, + char *dst_term) { const char *s0; const char **map; @@ -459,8 +444,8 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, /* term_103: handle term, where trunc = re-2 (regular expressions) */ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split, - char *dst_term) + char *dst, int *errors, int space_split, + char *dst_term) { int i = 0; int j = 0; @@ -524,20 +509,20 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, } /* term_103: handle term, where trunc = re-1 (regular expressions) */ -static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split, char *dst_term) +static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src, + char *dst, int space_split, char *dst_term) { return term_103(zebra_maps, reg_type, src, dst, NULL, space_split, - dst_term); + dst_term); } /* term_104: handle term, where trunc = Process # and ! */ static int term_104(ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term) + const char **src, char *dst, int space_split, + char *dst_term) { - const char *s0, *s1; + const char *s0; const char **map; int i = 0; int j = 0; @@ -585,17 +570,33 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, } else { - s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0); + const char *s1 = s0; + int q_map_match = 0; + map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + &q_map_match); if (space_split && **map == *CHR_SPACE) break; - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst_term[j++] = *s1; - dst[i++] = *s1++; - } + + /* add non-space char */ + memcpy(dst_term+j, s1, s0 - s1); + j += (s0 - s1); + if (!q_map_match) + { + while (s1 < s0) + { + if (strchr(REGEX_CHARS, *s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + strcpy(dst + i, map[0]); + i += strlen(map[0]); + } } } dst[i] = '\0'; @@ -605,11 +606,11 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, } /* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105 (ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split, - char *dst_term, int right_truncate) +static int term_105(ZebraMaps zebra_maps, int reg_type, + const char **src, char *dst, int space_split, + char *dst_term, int right_truncate) { - const char *s0, *s1; + const char *s0; const char **map; int i = 0; int j = 0; @@ -632,17 +633,33 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type, } else { - s1 = s0; - map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0); + const char *s1 = s0; + int q_map_match = 0; + map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + &q_map_match); if (space_split && **map == *CHR_SPACE) break; - while (s1 < s0) - { - if (strchr(REGEX_CHARS, *s1)) - dst[i++] = '\\'; - dst_term[j++] = *s1; - dst[i++] = *s1++; - } + + /* add non-space char */ + memcpy(dst_term+j, s1, s0 - s1); + j += (s0 - s1); + if (!q_map_match) + { + while (s1 < s0) + { + if (strchr(REGEX_CHARS, *s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + else + { + char tmpbuf[80]; + esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0])); + + strcpy(dst + i, map[0]); + i += strlen(map[0]); + } } } if (right_truncate) @@ -788,9 +805,10 @@ void string_rel_add_char(char **term_p, const char *src, int *indx) * ([^a-].*|a[^b-].*|ab[^c-].*|abc) */ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, char *term_dict, - oid_value attributeSet, - int reg_type, int space_split, char *term_dst) + const char **term_sub, char *term_dict, + oid_value attributeSet, + int reg_type, int space_split, char *term_dst, + int *error_code) { AttrType relation; int relation_value; @@ -801,13 +819,14 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, attr_init(&relation, zapt, 2); relation_value = attr_find(&relation, NULL); + *error_code = 0; yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value); switch (relation_value) { case 1: - if (!term_100 (zh->reg->zebra_maps, reg_type, - term_sub, term_component, - space_split, term_dst)) + if (!term_100(zh->reg->zebra_maps, reg_type, + term_sub, term_component, + space_split, term_dst)) return 0; yaz_log(log_level_rpn, "Relation <"); @@ -819,12 +838,12 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (i) *term_tmp++ = '|'; while (j < i) - string_rel_add_char (&term_tmp, term_component, &j); + string_rel_add_char(&term_tmp, term_component, &j); *term_tmp++ = '['; *term_tmp++ = '^'; - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = '-'; *term_tmp++ = ']'; @@ -838,9 +857,9 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp = '\0'; break; case 2: - if (!term_100 (zh->reg->zebra_maps, reg_type, - term_sub, term_component, - space_split, term_dst)) + if (!term_100(zh->reg->zebra_maps, reg_type, + term_sub, term_component, + space_split, term_dst)) return 0; yaz_log(log_level_rpn, "Relation <="); @@ -850,11 +869,11 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int j = 0; while (j < i) - string_rel_add_char (&term_tmp, term_component, &j); + string_rel_add_char(&term_tmp, term_component, &j); *term_tmp++ = '['; *term_tmp++ = '^'; - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = '-'; *term_tmp++ = ']'; @@ -867,7 +886,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; } for (i = 0; term_component[i]; ) - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = ')'; *term_tmp = '\0'; break; @@ -883,12 +902,12 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int j = 0; while (j < i) - string_rel_add_char (&term_tmp, term_component, &j); + string_rel_add_char(&term_tmp, term_component, &j); *term_tmp++ = '['; *term_tmp++ = '^'; *term_tmp++ = '-'; - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = ']'; *term_tmp++ = '.'; @@ -900,15 +919,15 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; } for (i = 0; term_component[i];) - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = '.'; *term_tmp++ = '+'; *term_tmp++ = ')'; *term_tmp = '\0'; break; case 4: - if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, - term_component, space_split, term_dst)) + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, + term_component, space_split, term_dst)) return 0; yaz_log(log_level_rpn, "Relation >="); @@ -920,18 +939,18 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (i) *term_tmp++ = '|'; while (j < i) - string_rel_add_char (&term_tmp, term_component, &j); + string_rel_add_char(&term_tmp, term_component, &j); *term_tmp++ = '['; if (term_component[i+1]) { *term_tmp++ = '^'; *term_tmp++ = '-'; - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); } else { - string_rel_add_char (&term_tmp, term_component, &i); + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = '-'; } *term_tmp++ = ']'; @@ -945,64 +964,79 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp = '\0'; break; case 3: - default: + case 102: + case -1: yaz_log(log_level_rpn, "Relation ="); - if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, - term_component, space_split, term_dst)) + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, + term_component, space_split, term_dst)) return 0; strcat(term_tmp, "("); strcat(term_tmp, term_component); strcat(term_tmp, ")"); + break; + default: + *error_code = 117; + return 0; } return 1; } -static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, - struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, int xpath_use); - -static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, - struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, - const char *rank_type, int xpath_use, - NMEM rset_nmem) +static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + const char **term_sub, + oid_value attributeSet, NMEM stream, + struct grep_info *grep_info, + int reg_type, int complete_flag, + int num_bases, char **basenames, + char *term_dst, int xpath_use); + +static ZEBRA_RES term_trunc(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char **term_sub, + oid_value attributeSet, NMEM stream, + struct grep_info *grep_info, + int reg_type, int complete_flag, + int num_bases, char **basenames, + char *term_dst, + const char *rank_type, int xpath_use, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { - int r; + ZEBRA_RES res; + *rset = 0; grep_info->isam_p_indx = 0; - r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, - reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use); - if (r < 1) - return 0; + res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, + reg_type, complete_flag, num_bases, basenames, + term_dst, xpath_use); + if (res != ZEBRA_OK) + return res; + if (!*term_sub) /* no more terms ? */ + return res; yaz_log(log_level_rpn, "term: %s", term_dst); - return rset_trunc(zh, grep_info->isam_p_buf, - grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */, - zapt->term->which, rset_nmem, - key_it_ctrl,key_it_ctrl->scope); + *rset = rset_trunc(zh, grep_info->isam_p_buf, + grep_info->isam_p_indx, term_dst, + strlen(term_dst), rank_type, 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope); + if (!*rset) + return ZEBRA_FAIL; + return ZEBRA_OK; } + static char *nmem_strdup_i(NMEM nmem, int v) { char val_str[64]; - sprintf (val_str, "%d", v); + sprintf(val_str, "%d", v); return nmem_strdup(nmem, val_str); } -static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, NMEM stream, - struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, int xpath_use) +static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + const char **term_sub, + oid_value attributeSet, NMEM stream, + struct grep_info *grep_info, + int reg_type, int complete_flag, + int num_bases, char **basenames, + char *term_dst, int xpath_use) { char term_dict[2*IT_MAX_WORD+4000]; int j, r, base_no; @@ -1021,11 +1055,11 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *errString = 0; /* addinfo */ rpn_char_map_prepare (zh->reg, reg_type, &rcmi); - attr_init (&use, zapt, 1); - use_value = attr_find_ex (&use, &curAttributeSet, &use_string); + attr_init(&use, zapt, 1); + use_value = attr_find_ex(&use, &curAttributeSet, &use_string); yaz_log(log_level_rpn, "string_term, use value %d", use_value); - attr_init (&truncation, zapt, 5); - truncation_value = attr_find (&truncation, NULL); + attr_init(&truncation, zapt, 5); + truncation_value = attr_find(&truncation, NULL); yaz_log(log_level_rpn, "truncation value %d", truncation_value); if (use_value == -1) /* no attribute - assumy "any" */ @@ -1040,14 +1074,15 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, data1_local_attribute id_xpath_attr; data1_local_attribute *local_attr; int max_pos, prefix_len = 0; + int relation_error; termp = *term_sub; if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) { - zh->errCode = 109; /* Database unavailable */ + zh->errCode = YAZ_BIB1_DATABASE_UNAVAILABLE; zh->errString = basenames[base_no]; - return -1; + return ZEBRA_FAIL; } if (xpath_use > 0 && use_value == -2) { @@ -1099,7 +1134,7 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (r == -1) { /* set was found, but value wasn't defined */ - errCode = 114; + errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; if (use_string) errString = nmem_strdup(stream, use_string); else @@ -1115,8 +1150,8 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, oident.value = curAttributeSet; oid_ent_to_oid (&oident, oid); - errCode = 121; - errString = nmem_strdup (stream, oident.desc); + errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET; + errString = nmem_strdup(stream, oident.desc); } continue; } @@ -1144,21 +1179,9 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, term_dict[prefix_len++] = ord_buf[i]; } } - if (!prefix_len) - { -#if 1 - bases_ok++; -#else - errCode = 114; - errString = nmem_strdup_i(stream, use_value); - continue; -#endif - } - else - { - bases_ok++; /* this has OK attributes */ + bases_ok++; + if (prefix_len) attr_ok = 1; - } term_dict[prefix_len++] = ')'; term_dict[prefix_len++] = 1; @@ -1172,78 +1195,115 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, case 100: /* do not truncate */ if (!string_relation (zh, zapt, &termp, term_dict, attributeSet, - reg_type, space_split, term_dst)) - return 0; + reg_type, space_split, term_dst, + &relation_error)) + { + if (relation_error) + { + zh->errCode = relation_error; + return ZEBRA_FAIL; + } + *term_sub = 0; + return ZEBRA_OK; + } break; case 1: /* right truncation */ term_dict[j++] = '('; if (!term_100(zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) - return 0; + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ".*)"); break; case 2: /* keft truncation */ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; if (!term_100(zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) - return 0; + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); break; case 3: /* left&right truncation */ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; if (!term_100(zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) - return 0; + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ".*)"); break; case 101: /* process # in term */ term_dict[j++] = '('; if (!term_101(zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) - return 0; + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); break; case 102: /* Regexp-1 */ term_dict[j++] = '('; if (!term_102(zh->reg->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) - return 0; + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); break; case 103: /* Regexp-2 */ - r = 1; + regex_range = 1; term_dict[j++] = '('; init_pos = 2; - if (!term_103 (zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, ®ex_range, - space_split, term_dst)) - return 0; + if (!term_103(zh->reg->zebra_maps, reg_type, + &termp, term_dict + j, ®ex_range, + space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); + break; case 104: /* process # and ! in term */ term_dict[j++] = '('; - if (!term_104 (zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - return 0; + if (!term_104(zh->reg->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); break; case 105: /* process * and ! in term */ term_dict[j++] = '('; - if (!term_105 (zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 1)) - return 0; + if (!term_105(zh->reg->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); break; case 106: /* process * and ! in term */ term_dict[j++] = '('; - if (!term_105 (zh->reg->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst, 0)) - return 0; + if (!term_105(zh->reg->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst, 0)) + { + *term_sub = 0; + return ZEBRA_OK; + } strcat(term_dict, ")"); break; default: - zh->errCode = 120; + zh->errCode = YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE; zh->errString = nmem_strdup_i(stream, truncation_value); - return -1; + return ZEBRA_FAIL; } if (attr_ok) { @@ -1265,17 +1325,17 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { zh->errCode = errCode; zh->errString = errString; - return -1; + return ZEBRA_FAIL; } *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); - return 1; + return ZEBRA_OK; } /* convert APT search term to UTF8 */ -static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz) +static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + char *termz) { size_t sizez; Z_Term *term = zapt->term; @@ -1296,7 +1356,8 @@ static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (ret == (size_t)(-1)) { ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0); - zh->errCode = 125; + zh->errCode = + YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_; return -1; } *outbuf = 0; @@ -1318,20 +1379,20 @@ static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt, termz[sizez] = '\0'; break; default: - zh->errCode = 124; - return -1; + zh->errCode = YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM; + return ZEBRA_FAIL; } - return 0; + return ZEBRA_OK; } /* convert APT SCAN term to internal cmap */ -static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz, int reg_type) +static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + char *termz, int reg_type) { char termz0[IT_MAX_WORD]; - if (zapt_term_to_utf8(zh, zapt, termz0)) - return -1; /* error */ + if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL) + return ZEBRA_FAIL; /* error */ else { const char **map; @@ -1344,7 +1405,7 @@ static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, while ((len = (cp_end - cp)) > 0) { - map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0); + map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0); if (**map == *CHR_SPACE) space_map = *map; else @@ -1359,7 +1420,7 @@ static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } termz[i] = '\0'; } - return 0; + return ZEBRA_OK; } char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, @@ -1370,8 +1431,8 @@ char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int truncation_value; char *ex_list = 0; - attr_init (&truncation, zapt, 5); - truncation_value = attr_find (&truncation, NULL); + attr_init(&truncation, zapt, 5); + truncation_value = attr_find(&truncation, NULL); switch (truncation_value) { @@ -1406,19 +1467,19 @@ char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } } -static void grep_info_delete (struct grep_info *grep_info) +static void grep_info_delete(struct grep_info *grep_info) { #ifdef TERM_COUNT xfree(grep_info->term_no); #endif - xfree (grep_info->isam_p_buf); + xfree(grep_info->isam_p_buf); } -static int grep_info_prepare (ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - struct grep_info *grep_info, - int reg_type, - NMEM stream) +static int grep_info_prepare(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + struct grep_info *grep_info, + int reg_type, + NMEM stream) { AttrType termset; int termset_value_numeric; @@ -1435,9 +1496,9 @@ static int grep_info_prepare (ZebraHandle zh, if (!zapt) return 0; - attr_init (&termset, zapt, 8); + attr_init(&termset, zapt, 8); termset_value_numeric = - attr_find_ex (&termset, NULL, &termset_value_string); + attr_find_ex(&termset, NULL, &termset_value_string); if (termset_value_numeric != -1) { char resname[32]; @@ -1445,17 +1506,17 @@ static int grep_info_prepare (ZebraHandle zh, if (termset_value_numeric != -2) { - sprintf (resname, "%d", termset_value_numeric); + sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } else termset_name = termset_value_string; yaz_log(log_level_rpn, "creating termset set %s", termset_name); - grep_info->termset = resultSetAdd (zh, termset_name, 1); + grep_info->termset = resultSetAdd(zh, termset_name, 1); if (!grep_info->termset) { - zh->errCode = 128; - zh->errString = nmem_strdup (stream, termset_name); + zh->errCode = YAZ_BIB1_ILLEGAL_RESULT_SET_NAME; + zh->errString = nmem_strdup(stream, termset_name); return -1; } } @@ -1463,138 +1524,185 @@ static int grep_info_prepare (ZebraHandle zh, } -static RSET rpn_search_APT_phrase (ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz_org, - oid_value attributeSet, - NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, int xpath_use, - int num_bases, char **basenames, - NMEM rset_nmem) +static ZEBRA_RES term_list_trunc(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz_org, + oid_value attributeSet, + NMEM stream, + int reg_type, int complete_flag, + const char *rank_type, int xpath_use, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET **result_sets, int *num_result_sets, + struct rset_key_control *kc) { char term_dst[IT_MAX_WORD+1]; - RSET rset[TERM_LIST_LENGTH_MAX], result; - size_t rset_no = 0; struct grep_info grep_info; char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); const char *termp = termz; + int alloc_sets = 0; + *num_result_sets = 0; *term_dst = 0; - if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) - return 0; - for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++) + if (grep_info_prepare(zh, zapt, &grep_info, reg_type, stream)) + return ZEBRA_FAIL; + while(1) { - yaz_log(log_level_rpn, "APT_phrase termp=%s", termp); - rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - reg_type, complete_flag, - num_bases, basenames, - term_dst, rank_type, - xpath_use,rset_nmem); - if (!rset[rset_no]) - break; + ZEBRA_RES res; + + if (alloc_sets == *num_result_sets) + { + int add = 10; + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + sizeof(*rnew)); + if (alloc_sets) + memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew)); + alloc_sets = alloc_sets + add; + *result_sets = rnew; + } + res = term_trunc(zh, zapt, &termp, attributeSet, + stream, &grep_info, + reg_type, complete_flag, + num_bases, basenames, + term_dst, rank_type, + xpath_use, rset_nmem, + &(*result_sets)[*num_result_sets], + kc); + if (res != ZEBRA_OK) + { + int i; + for (i = 0; i < *num_result_sets; i++) + rset_delete((*result_sets)[i]); + grep_info_delete (&grep_info); + return res; + } + if ((*result_sets)[*num_result_sets] == 0) + break; + (*num_result_sets)++; } - grep_info_delete (&grep_info); - if (rset_no == 0) - return rsnull_create (rset_nmem,key_it_ctrl); - else if (rset_no == 1) - return (rset[0]); - else - result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope, - rset_no, rset, - 1 /* ordered */, 0 /* exclusion */, - 3 /* relation */, 1 /* distance */); - return result; + grep_info_delete(&grep_info); + return ZEBRA_OK; } -static RSET rpn_search_APT_or_list (ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz_org, - oid_value attributeSet, - NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, - int xpath_use, - int num_bases, char **basenames, - NMEM rset_nmem) +static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz_org, + oid_value attributeSet, + NMEM stream, + int reg_type, int complete_flag, + const char *rank_type, int xpath_use, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; - RSET rset[TERM_LIST_LENGTH_MAX]; - size_t rset_no = 0; - struct grep_info grep_info; - char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); - const char *termp = termz; - - if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) - return 0; - for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++) - { - yaz_log(log_level_rpn, "APT_or_list termp=%s", termp); - rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - reg_type, complete_flag, - num_bases, basenames, - term_dst, rank_type, - xpath_use,rset_nmem); - if (!rset[rset_no]) - break; - } - grep_info_delete (&grep_info); - if (rset_no == 0) - return rsnull_create (rset_nmem,key_it_ctrl); - return rsmulti_or_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope, - rset_no, rset); + RSET *result_sets = 0; + int num_result_sets = 0; + ZEBRA_RES res = + term_list_trunc(zh, zapt, termz_org, attributeSet, + stream, reg_type, complete_flag, + rank_type, xpath_use, + num_bases, basenames, + rset_nmem, + &result_sets, &num_result_sets, kc); + if (res != ZEBRA_OK) + return res; + if (num_result_sets == 0) + *rset = rsnull_create (rset_nmem, kc); + else if (num_result_sets == 1) + *rset = result_sets[0]; + else + *rset = rsprox_create(rset_nmem, kc, kc->scope, + num_result_sets, result_sets, + 1 /* ordered */, 0 /* exclusion */, + 3 /* relation */, 1 /* distance */); + if (!*rset) + return ZEBRA_FAIL; + return ZEBRA_OK; } -static RSET rpn_search_APT_and_list (ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz_org, - oid_value attributeSet, - NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, - int xpath_use, - int num_bases, char **basenames, - NMEM rset_nmem) +static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz_org, + oid_value attributeSet, + NMEM stream, + int reg_type, int complete_flag, + const char *rank_type, + int xpath_use, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { - char term_dst[IT_MAX_WORD+1]; - RSET rset[TERM_LIST_LENGTH_MAX]; - size_t rset_no = 0; - struct grep_info grep_info; - char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); - const char *termp = termz; + RSET *result_sets = 0; + int num_result_sets = 0; + ZEBRA_RES res = + term_list_trunc(zh, zapt, termz_org, attributeSet, + stream, reg_type, complete_flag, + rank_type, xpath_use, + num_bases, basenames, + rset_nmem, + &result_sets, &num_result_sets, kc); + if (res != ZEBRA_OK) + return res; + if (num_result_sets == 0) + *rset = rsnull_create (rset_nmem, kc); + else if (num_result_sets == 1) + *rset = result_sets[0]; + else + *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, + num_result_sets, result_sets); + if (!*rset) + return ZEBRA_FAIL; + return ZEBRA_OK; +} - if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) - return 0; - for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++) - { - yaz_log(log_level_rpn, "APT_and_list termp=%s", termp); - rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet, - stream, &grep_info, - reg_type, complete_flag, - num_bases, basenames, - term_dst, rank_type, - xpath_use, rset_nmem); - if (!rset[rset_no]) - break; - } - grep_info_delete (&grep_info); - if (rset_no == 0) - return rsnull_create(rset_nmem,key_it_ctrl); - - return rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope, - rset_no, rset); +static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz_org, + oid_value attributeSet, + NMEM stream, + int reg_type, int complete_flag, + const char *rank_type, + int xpath_use, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) +{ + RSET *result_sets = 0; + int num_result_sets = 0; + ZEBRA_RES res = + term_list_trunc(zh, zapt, termz_org, attributeSet, + stream, reg_type, complete_flag, + rank_type, xpath_use, + num_bases, basenames, + rset_nmem, + &result_sets, &num_result_sets, + kc); + if (res != ZEBRA_OK) + return res; + if (num_result_sets == 0) + *rset = rsnull_create (rset_nmem, kc); + else if (num_result_sets == 1) + *rset = result_sets[0]; + else + *rset = rsmulti_and_create(rset_nmem, kc, kc->scope, + num_result_sets, result_sets); + if (!*rset) + return ZEBRA_FAIL; + return ZEBRA_OK; } -static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - char *term_dict, - oid_value attributeSet, - struct grep_info *grep_info, - int *max_pos, - int reg_type, - char *term_dst) +static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + const char **term_sub, + char *term_dict, + oid_value attributeSet, + struct grep_info *grep_info, + int *max_pos, + int reg_type, + char *term_dst, + int *error_code) { AttrType relation; int relation_value; @@ -1602,37 +1710,42 @@ static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, int r; char *term_tmp = term_dict + strlen(term_dict); - attr_init (&relation, zapt, 2); - relation_value = attr_find (&relation, NULL); + *error_code = 0; + attr_init(&relation, zapt, 2); + relation_value = attr_find(&relation, NULL); yaz_log(log_level_rpn, "numeric relation value=%d", relation_value); - if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1, + term_dst)) return 0; term_value = atoi (term_tmp); switch (relation_value) { case 1: yaz_log(log_level_rpn, "Relation <"); - gen_regular_rel (term_tmp, term_value-1, 1); + gen_regular_rel(term_tmp, term_value-1, 1); break; case 2: yaz_log(log_level_rpn, "Relation <="); - gen_regular_rel (term_tmp, term_value, 1); + gen_regular_rel(term_tmp, term_value, 1); break; case 4: yaz_log(log_level_rpn, "Relation >="); - gen_regular_rel (term_tmp, term_value, 0); + gen_regular_rel(term_tmp, term_value, 0); break; case 5: yaz_log(log_level_rpn, "Relation >"); - gen_regular_rel (term_tmp, term_value+1, 0); + gen_regular_rel(term_tmp, term_value+1, 0); break; + case -1: case 3: - default: yaz_log(log_level_rpn, "Relation ="); - sprintf (term_tmp, "(0*%d)", term_value); + sprintf(term_tmp, "(0*%d)", term_value); + break; + default: + *error_code = 117; + return 0; } yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp); r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos, @@ -1643,12 +1756,13 @@ static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, return 1; } -static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - oid_value attributeSet, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, - char *term_dst, int xpath_use, NMEM stream) +static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + const char **term_sub, + oid_value attributeSet, + struct grep_info *grep_info, + int reg_type, int complete_flag, + int num_bases, char **basenames, + char *term_dst, int xpath_use, NMEM stream) { char term_dict[2*IT_MAX_WORD+2]; int r, base_no; @@ -1664,8 +1778,8 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *errString = 0; /* addinfo */ rpn_char_map_prepare (zh->reg, reg_type, &rcmi); - attr_init (&use, zapt, 1); - use_value = attr_find_ex (&use, &curAttributeSet, &use_string); + attr_init(&use, zapt, 1); + use_value = attr_find_ex(&use, &curAttributeSet, &use_string); if (use_value == -1) use_value = 1016; @@ -1676,6 +1790,7 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, data1_local_attribute id_xpath_attr; data1_local_attribute *local_attr; int max_pos, prefix_len = 0; + int relation_error = 0; termp = *term_sub; if (use_value == -2) /* string attribute (assume IDXPATH/any) */ @@ -1702,20 +1817,20 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, curAttributeSet, use_value, r); if (r == -1) { - errCode = 114; + errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; if (use_string) errString = nmem_strdup(stream, use_string); else errString = nmem_strdup_i (stream, use_value); } else - errCode = 121; + errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET; continue; } } if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) { - zh->errCode = 109; /* Database unavailable */ + zh->errCode = YAZ_BIB1_DATABASE_UNAVAILABLE; zh->errString = basenames[base_no]; return -1; } @@ -1745,7 +1860,7 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } if (!prefix_len) { - errCode = 114; + errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; errString = nmem_strdup_i(stream, use_value); continue; } @@ -1755,85 +1870,122 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, term_dict[prefix_len++] = reg_type; yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]); term_dict[prefix_len] = '\0'; - if (!numeric_relation (zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, reg_type, - term_dst)) - return 0; + if (!numeric_relation(zh, zapt, &termp, term_dict, + attributeSet, grep_info, &max_pos, reg_type, + term_dst, &relation_error)) + { + if (relation_error) + { + zh->errCode = relation_error; + zh->errString = 0; + return ZEBRA_FAIL; + } + *term_sub = 0; + return ZEBRA_OK; + } } if (!bases_ok) { zh->errCode = errCode; zh->errString = errString; - return -1; + return ZEBRA_FAIL; } *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); - return 1; + return ZEBRA_OK; } -static RSET rpn_search_APT_numeric (ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - const char *termz, - oid_value attributeSet, - NMEM stream, - int reg_type, int complete_flag, - const char *rank_type, int xpath_use, - int num_bases, char **basenames, - NMEM rset_nmem) +static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + oid_value attributeSet, + NMEM stream, + int reg_type, int complete_flag, + const char *rank_type, int xpath_use, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { char term_dst[IT_MAX_WORD+1]; const char *termp = termz; - RSET rset[TERM_LIST_LENGTH_MAX]; - int r; - size_t rset_no = 0; + RSET *result_sets = 0; + int num_result_sets = 0; + ZEBRA_RES res; struct grep_info grep_info; + int alloc_sets = 0; - yaz_log(log_level_rpn, "APT_numeric t='%s'",termz); - if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) - return 0; - for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++) + yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); + if (grep_info_prepare(zh, zapt, &grep_info, reg_type, stream)) + return ZEBRA_FAIL; + while (1) { + if (alloc_sets == num_result_sets) + { + int add = 10; + RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * + sizeof(*rnew)); + if (alloc_sets) + memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew)); + alloc_sets = alloc_sets + add; + result_sets = rnew; + } yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp); grep_info.isam_p_indx = 0; - r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info, - reg_type, complete_flag, num_bases, basenames, - term_dst, xpath_use, - stream); - if (r < 1) - break; + res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info, + reg_type, complete_flag, num_bases, basenames, + term_dst, xpath_use, + stream); + if (res == ZEBRA_FAIL || termp == 0) + break; yaz_log(YLOG_DEBUG, "term: %s", term_dst); - rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term_dst, - strlen(term_dst), rank_type, - 0 /* preserve position */, - zapt->term->which, rset_nmem, - key_it_ctrl,key_it_ctrl->scope); - if (!rset[rset_no]) + result_sets[num_result_sets] = + rset_trunc(zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term_dst, + strlen(term_dst), rank_type, + 0 /* preserve position */, + zapt->term->which, rset_nmem, + kc, kc->scope); + if (!result_sets[num_result_sets]) break; + num_result_sets++; } - grep_info_delete (&grep_info); - if (rset_no == 0) - return rsnull_create(rset_nmem,key_it_ctrl); - if (rset_no == 1) - return rset[0]; - return rsmulti_and_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope, - rset_no, rset); + grep_info_delete(&grep_info); + if (termp) + { + int i; + for (i = 0; iscope, + num_result_sets, result_sets); + if (!*rset) + return ZEBRA_FAIL; + return ZEBRA_OK; } -static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz, - oid_value attributeSet, - NMEM stream, - const char *rank_type, NMEM rset_nmem) +static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz, + oid_value attributeSet, + NMEM stream, + const char *rank_type, NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { - RSET result; RSFD rsfd; struct it_key key; int sys; - result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope, - res_get (zh->res, "setTmpDir"),0 ); - rsfd = rset_open (result, RSETF_WRITE); - + *rset = rstemp_create(rset_nmem, kc, kc->scope, + res_get (zh->res, "setTmpDir"),0 ); + rsfd = rset_open(*rset, RSETF_WRITE); + sys = atoi(termz); if (sys <= 0) sys = 1; @@ -1842,13 +1994,15 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt, key.len = 2; rset_write (rsfd, &key); rset_close (rsfd); - return result; + return ZEBRA_OK; } -static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, - Z_SortKeySpecList *sort_sequence, - const char *rank_type) +static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + oid_value attributeSet, NMEM stream, + Z_SortKeySpecList *sort_sequence, + const char *rank_type, + RSET *rset, + struct rset_key_control *kc) { int i; int sort_relation_value; @@ -1862,11 +2016,11 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt, oident oe; char termz[20]; - attr_init (&sort_relation_type, zapt, 7); - sort_relation_value = attr_find (&sort_relation_type, &attributeSet); + attr_init(&sort_relation_type, zapt, 7); + sort_relation_value = attr_find(&sort_relation_type, &attributeSet); - attr_init (&use_type, zapt, 1); - use_value = attr_find (&use_type, &attributeSet); + attr_init(&use_type, zapt, 1); + use_value = attr_find(&use_type, &attributeSet); if (!sort_sequence->specs) { @@ -1884,13 +2038,13 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt, zapt->term->u.general->len); if (i >= sort_sequence->num_specs) i = 0; - sprintf (termz, "%d", i); + sprintf(termz, "%d", i); oe.proto = PROTO_Z3950; oe.oclass = CLASS_ATTSET; oe.value = attributeSet; if (!oid_ent_to_oid (&oe, oid)) - return 0; + return ZEBRA_FAIL; sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks)); sks->sortElement = (Z_SortElement *) @@ -1935,8 +2089,8 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt, sks->which = Z_SortKeySpec_null; sks->u.null = odr_nullval (); sort_sequence->specs[i] = sks; - return rsnull_create (NULL,key_it_ctrl); - /* FIXME - nmem?? */ + *rset = rsnull_create (NULL, kc); + return ZEBRA_OK; } @@ -1948,8 +2102,8 @@ static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, AttrType use; const char *use_string = 0; - attr_init (&use, zapt, 1); - attr_find_ex (&use, &curAttributeSet, &use_string); + attr_init(&use, zapt, 1); + attr_find_ex(&use, &curAttributeSet, &use_string); if (!use_string || *use_string != '/') return -1; @@ -1961,7 +2115,8 @@ static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static RSET xpath_trunc(ZebraHandle zh, NMEM stream, int reg_type, const char *term, int use, - oid_value curAttributeSet, NMEM rset_nmem) + oid_value curAttributeSet, NMEM rset_nmem, + struct rset_key_control *kc) { RSET rset; struct grep_info grep_info; @@ -1973,11 +2128,11 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, int term_type = Z_Term_characterString; const char *flags = "void"; - if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream)) - return rsnull_create (rset_nmem,key_it_ctrl); - + if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0', stream)) + return rsnull_create(rset_nmem, kc); + if (ord < 0) - return rsnull_create (rset_nmem,key_it_ctrl); + return rsnull_create(rset_nmem, kc); if (prefix_len) term_dict[prefix_len++] = '|'; else @@ -1998,22 +2153,23 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, grep_info.isam_p_indx = 0; r = dict_lookup_grep(zh->reg->dict, term_dict, 0, &grep_info, &max_pos, 0, grep_handle); - yaz_log (YLOG_DEBUG, "%s %d positions", term, + yaz_log(YLOG_DEBUG, "%s %d positions", term, grep_info.isam_p_indx); rset = rset_trunc(zh, grep_info.isam_p_buf, - grep_info.isam_p_indx, term, strlen(term), - flags, 1, term_type,rset_nmem, - key_it_ctrl, key_it_ctrl->scope); - grep_info_delete (&grep_info); + grep_info.isam_p_indx, term, strlen(term), + flags, 1, term_type,rset_nmem, + kc, kc->scope); + grep_info_delete(&grep_info); return rset; } -static RSET rpn_search_xpath (ZebraHandle zh, - oid_value attributeSet, - int num_bases, char **basenames, - NMEM stream, const char *rank_type, RSET rset, - int xpath_len, struct xpath_location_step *xpath, - NMEM rset_nmem) +static RSET rpn_search_xpath(ZebraHandle zh, + oid_value attributeSet, + int num_bases, char **basenames, + NMEM stream, const char *rank_type, RSET rset, + int xpath_len, struct xpath_location_step *xpath, + NMEM rset_nmem, + struct rset_key_control *kc) { oid_value curAttributeSet = attributeSet; int base_no; @@ -2022,10 +2178,10 @@ static RSET rpn_search_xpath (ZebraHandle zh, if (xpath_len < 0) return rset; - yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len); + yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len); for (i = 0; ireg->zei, basenames[base_no])) { - zh->errCode = 109; /* Database unavailable */ + zh->errCode = YAZ_BIB1_DATABASE_UNAVAILABLE; zh->errString = basenames[base_no]; return rset; } @@ -2123,7 +2279,7 @@ static RSET rpn_search_xpath (ZebraHandle zh, wrbuf_puts(wbuf, ""); rset_attr = xpath_trunc( zh, stream, '0', wrbuf_buf(wbuf), 3, - curAttributeSet,rset_nmem); + curAttributeSet, rset_nmem, kc); wrbuf_free(wbuf, 1); } else @@ -2131,17 +2287,16 @@ static RSET rpn_search_xpath (ZebraHandle zh, if (!first_path) continue; } - yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev); + yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev); if (strlen(xpath_rev)) { rset_start_tag = xpath_trunc(zh, stream, '0', - xpath_rev, 1, curAttributeSet, rset_nmem); + xpath_rev, 1, curAttributeSet, rset_nmem, kc); rset_end_tag = xpath_trunc(zh, stream, '0', - xpath_rev, 2, curAttributeSet, rset_nmem); + xpath_rev, 2, curAttributeSet, rset_nmem, kc); - rset = rsbetween_create(rset_nmem, key_it_ctrl, - key_it_ctrl->scope, + rset = rsbetween_create(rset_nmem, kc, kc->scope, rset_start_tag, rset, rset_end_tag, rset_attr); } @@ -2152,21 +2307,21 @@ static RSET rpn_search_xpath (ZebraHandle zh, return rset; } - - -static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, - Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, - NMEM rset_nmem) +static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + oid_value attributeSet, NMEM stream, + Z_SortKeySpecList *sort_sequence, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) { + ZEBRA_RES res = ZEBRA_OK; unsigned reg_id; char *search_type = NULL; char rank_type[128]; int complete_flag; int sort_flag; char termz[IT_MAX_WORD+1]; - RSET rset = 0; int xpath_len; int xpath_use = 0; struct xpath_location_step xpath[10]; @@ -2176,20 +2331,20 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, log_level_rpn = yaz_log_module_level("rpn"); log_level_set = 1; } - zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type, - rank_type, &complete_flag, &sort_flag); + zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type, + rank_type, &complete_flag, &sort_flag); yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type); - if (zapt_term_to_utf8(zh, zapt, termz)) - return 0; + if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL) + return ZEBRA_FAIL; if (sort_flag) - return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence, - rank_type); + return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence, + rank_type, rset, kc); xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream); if (xpath_len >= 0) { @@ -2198,213 +2353,267 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, xpath_use = 1015; } - if (!strcmp (search_type, "phrase")) + if (!strcmp(search_type, "phrase")) + { + res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, + reg_id, complete_flag, rank_type, + xpath_use, + num_bases, basenames, rset_nmem, + rset, kc); + } + else if (!strcmp(search_type, "and-list")) { - rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, - xpath_use, - num_bases, basenames, rset_nmem); + res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, + reg_id, complete_flag, rank_type, + xpath_use, + num_bases, basenames, rset_nmem, + rset, kc); } - else if (!strcmp (search_type, "and-list")) + else if (!strcmp(search_type, "or-list")) { - rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, - xpath_use, - num_bases, basenames, rset_nmem); + res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, + reg_id, complete_flag, rank_type, + xpath_use, + num_bases, basenames, rset_nmem, + rset, kc); } - else if (!strcmp (search_type, "or-list")) + else if (!strcmp(search_type, "local")) { - rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, - xpath_use, - num_bases, basenames, rset_nmem); + res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream, + rank_type, rset_nmem, rset, kc); } - else if (!strcmp (search_type, "local")) + else if (!strcmp(search_type, "numeric")) { - rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream, - rank_type, rset_nmem); + res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, + reg_id, complete_flag, rank_type, + xpath_use, + num_bases, basenames, rset_nmem, + rset, kc); } - else if (!strcmp (search_type, "numeric")) + else { - rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, - xpath_use, - num_bases, basenames, rset_nmem); + zh->errCode = YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE; + return ZEBRA_FAIL; } - else if (!strcmp (search_type, "always")) + if (res != ZEBRA_OK) + return res; + if (!*rset) + return ZEBRA_FAIL; + *rset = rpn_search_xpath(zh, attributeSet, num_bases, basenames, + stream, rank_type, *rset, + xpath_len, xpath, rset_nmem, kc); + if (!*rset) + return ZEBRA_FAIL; + return ZEBRA_OK; +} + +static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, + oid_value attributeSet, + NMEM stream, NMEM rset_nmem, + Z_SortKeySpecList *sort_sequence, + int num_bases, char **basenames, + RSET **result_sets, int *num_result_sets, + Z_Operator *parent_op, + struct rset_key_control *kc); + +ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, + oid_value attributeSet, + NMEM stream, NMEM rset_nmem, + Z_SortKeySpecList *sort_sequence, + int num_bases, char **basenames, + RSET *result_set) +{ + RSET *result_sets = 0; + int num_result_sets = 0; + ZEBRA_RES res; + struct rset_key_control *kc = zebra_key_control_create(zh); + + res = rpn_search_structure(zh, zs, attributeSet, + stream, rset_nmem, + sort_sequence, + num_bases, basenames, + &result_sets, &num_result_sets, + 0 /* no parent op */, + kc); + if (res != ZEBRA_OK) { - rset = 0; + int i; + for (i = 0; ierrCode = 118; - return rpn_search_xpath (zh, attributeSet, num_bases, basenames, - stream, rank_type, rset, - xpath_len, xpath, rset_nmem); + assert(num_result_sets == 1); + assert(result_sets); + assert(*result_sets); + *result_set = *result_sets; + + (*kc->dec)(kc); + return ZEBRA_OK; } -static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, - NMEM stream, NMEM rset_nmem, - Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames) +ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, + oid_value attributeSet, + NMEM stream, NMEM rset_nmem, + Z_SortKeySpecList *sort_sequence, + int num_bases, char **basenames, + RSET **result_sets, int *num_result_sets, + Z_Operator *parent_op, + struct rset_key_control *kc) { - RSET r = NULL; + *num_result_sets = 0; if (zs->which == Z_RPNStructure_complex) { + ZEBRA_RES res; Z_Operator *zop = zs->u.complex->roperator; - RSET rsets[2]; /* l and r argument */ - - rsets[0]=rpn_search_structure (zh, zs->u.complex->s1, - attributeSet, stream, rset_nmem, - sort_sequence, - num_bases, basenames); - if (rsets[0] == NULL) - return NULL; - rsets[1]=rpn_search_structure (zh, zs->u.complex->s2, - attributeSet, stream, rset_nmem, - sort_sequence, - num_bases, basenames); - if (rsets[1] == NULL) - { - rset_delete (rsets[0]); - return NULL; - } + RSET *result_sets_l = 0; + int num_result_sets_l = 0; + RSET *result_sets_r = 0; + int num_result_sets_r = 0; + + res = rpn_search_structure(zh, zs->u.complex->s1, + attributeSet, stream, rset_nmem, + sort_sequence, + num_bases, basenames, + &result_sets_l, &num_result_sets_l, + zop, kc); + if (res != ZEBRA_OK) + { + int i; + for (i = 0; iu.complex->s2, + attributeSet, stream, rset_nmem, + sort_sequence, + num_bases, basenames, + &result_sets_r, &num_result_sets_r, + zop, kc); + if (res != ZEBRA_OK) + { + int i; + for (i = 0; iwhich) - { - case Z_Operator_and: - r = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope, - 2, rsets); - break; - case Z_Operator_or: - r = rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope, - 2, rsets); - break; - case Z_Operator_and_not: - r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope, - rsets[0],rsets[1]); - break; - case Z_Operator_prox: - if (zop->u.prox->which != Z_ProximityOperator_known) - { - zh->errCode = 132; - return NULL; - } - if (*zop->u.prox->u.known != Z_ProxUnit_word) - { - char *val = (char *) nmem_malloc(stream, 16); - zh->errCode = 132; - zh->errString = val; - sprintf (val, "%d", *zop->u.prox->u.known); - return NULL; - } - else - { - /* new / old prox */ - r = rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope, - 2, rsets, - *zop->u.prox->ordered, - (!zop->u.prox->exclusion ? - 0 : *zop->u.prox->exclusion), - *zop->u.prox->relationType, - *zop->u.prox->distance ); - } - break; - default: - zh->errCode = 110; - return NULL; - } + /* make a new list of result for all children */ + *num_result_sets = num_result_sets_l + num_result_sets_r; + *result_sets = nmem_malloc(stream, *num_result_sets * + sizeof(**result_sets)); + memcpy(*result_sets, result_sets_l, + num_result_sets_l * sizeof(**result_sets)); + memcpy(*result_sets + num_result_sets_l, result_sets_r, + num_result_sets_r * sizeof(**result_sets)); + + if (!parent_op || parent_op->which != zop->which + || (zop->which != Z_Operator_and && + zop->which != Z_Operator_or)) + { + /* parent node different from this one (or non-present) */ + /* we must combine result sets now */ + RSET rset; + switch (zop->which) + { + case Z_Operator_and: + rset = rsmulti_and_create(rset_nmem, kc, + kc->scope, + *num_result_sets, *result_sets); + break; + case Z_Operator_or: + rset = rsmulti_or_create(rset_nmem, kc, + kc->scope, + *num_result_sets, *result_sets); + break; + case Z_Operator_and_not: + rset = rsbool_create_not(rset_nmem, kc, + kc->scope, + (*result_sets)[0], + (*result_sets)[1]); + break; + case Z_Operator_prox: + if (zop->u.prox->which != Z_ProximityOperator_known) + { + zh->errCode = YAZ_BIB1_UNSUPP_PROX_UNIT_CODE; + return ZEBRA_FAIL; + } + if (*zop->u.prox->u.known != Z_ProxUnit_word) + { + char *val = (char *) nmem_malloc(stream, 16); + zh->errCode = YAZ_BIB1_UNSUPP_PROX_UNIT_CODE; + zh->errString = val; + sprintf(val, "%d", *zop->u.prox->u.known); + return ZEBRA_FAIL; + } + else + { + rset = rsprox_create(rset_nmem, kc, + kc->scope, + *num_result_sets, *result_sets, + *zop->u.prox->ordered, + (!zop->u.prox->exclusion ? + 0 : *zop->u.prox->exclusion), + *zop->u.prox->relationType, + *zop->u.prox->distance ); + } + break; + default: + zh->errCode = YAZ_BIB1_OPERATOR_UNSUPP; + return ZEBRA_FAIL; + } + *num_result_sets = 1; + *result_sets = nmem_malloc(stream, *num_result_sets * + sizeof(**result_sets)); + (*result_sets)[0] = rset; + } } else if (zs->which == Z_RPNStructure_simple) { + RSET rset; + ZEBRA_RES res; + if (zs->u.simple->which == Z_Operand_APT) { yaz_log(YLOG_DEBUG, "rpn_search_APT"); - r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm, - attributeSet, stream, sort_sequence, - num_bases, basenames,rset_nmem); + res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm, + attributeSet, stream, sort_sequence, + num_bases, basenames, rset_nmem, &rset, + kc); + if (res != ZEBRA_OK) + return res; } else if (zs->u.simple->which == Z_Operand_resultSetId) { yaz_log(YLOG_DEBUG, "rpn_search_ref"); - r = resultSetRef (zh, zs->u.simple->u.resultSetId); - if (!r) + rset = resultSetRef(zh, zs->u.simple->u.resultSetId); + if (!rset) { - r = rsnull_create (rset_nmem,key_it_ctrl); - zh->errCode = 30; + zh->errCode = YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST; zh->errString = - nmem_strdup (stream, zs->u.simple->u.resultSetId); - return 0; + nmem_strdup(stream, zs->u.simple->u.resultSetId); + return ZEBRA_FAIL; } - else - rset_dup(r); + rset_dup(rset); } else { - zh->errCode = 3; - return 0; + zh->errCode = YAZ_BIB1_UNSUPP_SEARCH; + return ZEBRA_FAIL; } + *num_result_sets = 1; + *result_sets = nmem_malloc(stream, *num_result_sets * + sizeof(**result_sets)); + (*result_sets)[0] = rset; } else { - zh->errCode = 3; - return 0; - } - return r; -} - - -RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, - Z_RPNQuery *rpn, int num_bases, char **basenames, - const char *setname, - ZebraSet sset) -{ - RSET rset; - oident *attrset; - oid_value attributeSet; - Z_SortKeySpecList *sort_sequence; - int sort_status, i; - - zh->errCode = 0; - zh->errString = NULL; - zh->hits = 0; - - sort_sequence = (Z_SortKeySpecList *) - nmem_malloc(nmem, sizeof(*sort_sequence)); - sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */ - sort_sequence->specs = (Z_SortKeySpec **) - nmem_malloc(nmem, sort_sequence->num_specs * - sizeof(*sort_sequence->specs)); - for (i = 0; inum_specs; i++) - sort_sequence->specs[i] = 0; - - attrset = oid_getentbyoid (rpn->attributeSetId); - attributeSet = attrset->value; - rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet, - nmem, rset_nmem, - sort_sequence, num_bases, basenames); - if (!rset) - return 0; - - if (zh->errCode) - yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode); - - for (i = 0; sort_sequence->specs[i]; i++) - ; - sort_sequence->num_specs = i; - if (!i) - resultSetRank (zh, sset, rset, rset_nmem); - else - { - yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search"); - resultSetSortSingle (zh, nmem, sset, rset, - sort_sequence, &sort_status); - if (zh->errCode) - { - yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode); - } + zh->errCode = YAZ_BIB1_UNSUPP_SEARCH; + return ZEBRA_FAIL; } - return rset; + return ZEBRA_OK; } struct scan_info_entry { @@ -2427,9 +2636,13 @@ static int scan_handle (char *name, const char *info, int pos, void *client) len_prefix = strlen(scan_info->prefix); if (memcmp (name, scan_info->prefix, len_prefix)) return 1; - if (pos > 0) idx = scan_info->after - pos + scan_info->before; + if (pos > 0) + idx = scan_info->after - pos + scan_info->before; else idx = - pos - 1; + + if (idx < 0) + return 0; scan_info->list[idx].term = (char *) odr_malloc(scan_info->odr, strlen(name + len_prefix)+1); strcpy(scan_info->list[idx].term, name + len_prefix); @@ -2494,11 +2707,11 @@ static void count_set (RSET r, int *count) yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count); } -void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, - oid_value attributeset, - int num_bases, char **basenames, - int *position, int *num_entries, ZebraScanEntry **list, - int *is_partial, RSET limit_set, int return_zero) +ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, + oid_value attributeset, + int num_bases, char **basenames, + int *position, int *num_entries, ZebraScanEntry **list, + int *is_partial, RSET limit_set, int return_zero) { int i; int pos = *position; @@ -2525,6 +2738,7 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, int complete_flag; int sort_flag; NMEM rset_nmem = NULL; + struct rset_key_control *kc = 0; *list = 0; *is_partial = 0; @@ -2537,9 +2751,9 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, AttrType termset; int termset_value_numeric; const char *termset_value_string; - attr_init (&termset, zapt, 8); + attr_init(&termset, zapt, 8); termset_value_numeric = - attr_find_ex (&termset, NULL, &termset_value_string); + attr_find_ex(&termset, NULL, &termset_value_string); if (termset_value_numeric != -1) { char resname[32]; @@ -2548,7 +2762,7 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (termset_value_numeric != -2) { - sprintf (resname, "%d", termset_value_numeric); + sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } else @@ -2558,20 +2772,20 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, } } - yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d", - pos, num, attributeset); + yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d", + pos, num, attributeset); - attr_init (&use, zapt, 1); - use_value = attr_find_ex (&use, &attributeset, &use_string); + attr_init(&use, zapt, 1); + use_value = attr_find_ex(&use, &attributeset, &use_string); - if (zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type, - rank_type, &complete_flag, &sort_flag)) + if (zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type, + rank_type, &complete_flag, &sort_flag)) { *num_entries = 0; - zh->errCode = 113; - return ; + zh->errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE; + return ZEBRA_FAIL; } - yaz_log (YLOG_DEBUG, "use_value = %d", use_value); + yaz_log(YLOG_DEBUG, "use_value = %d", use_value); if (use_value == -1) use_value = 1016; @@ -2584,9 +2798,9 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) { zh->errString = basenames[base_no]; - zh->errCode = 109; /* Database unavailable */ + zh->errCode = YAZ_BIB1_DATABASE_UNAVAILABLE; *num_entries = 0; - return; + return ZEBRA_FAIL; } if (use_string && @@ -2609,18 +2823,18 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, attributeset, use_value); if (r == -1) { - errCode = 114; + errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; if (use_string) errString = odr_strdup(stream, use_string); else { char val_str[32]; - sprintf (val_str, "%d", use_value); + sprintf(val_str, "%d", use_value); errString = odr_strdup(stream, val_str); } } else - errCode = 121; + errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET; continue; } } @@ -2640,16 +2854,28 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, zh->errCode = errCode; zh->errString = errString; *num_entries = 0; - return; + return ZEBRA_FAIL; } if (ord_no == 0) { *num_entries = 0; - return; + return ZEBRA_OK; } /* prepare dictionary scanning */ + if (num < 1) + { + *num_entries = 0; + return ZEBRA_OK; + } before = pos-1; + if (before < 0) + before = 0; after = 1+num-pos; + if (after < 0) + after = 0; + yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d " + "after=%d before+after=%d", + pos, num, before, after, before+after); scan_info_array = (struct scan_info *) odr_malloc(stream, ord_no * sizeof(*scan_info_array)); for (i = 0; i < ord_no; i++) @@ -2675,8 +2901,8 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, termz[prefix_len] = 0; strcpy(scan_info->prefix, termz); - if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id)) - return ; + if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL) + return ZEBRA_FAIL; dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp, scan_info, scan_handle); @@ -2685,6 +2911,7 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, odr_malloc(stream, (before+after)*sizeof(*glist)); rset_nmem = nmem_create(); + kc = zebra_key_control_create(zh); /* consider terms after main term */ for (i = 0; i < ord_no; i++) @@ -2696,11 +2923,13 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, int j, j0 = -1; const char *mterm = NULL; const char *tst; - RSET rset; - + RSET rset = 0; + int lo = i + pos-1; /* offset in result list */ + + /* find: j0 is the first of the minimal values */ for (j = 0; j < ord_no; j++) { - if (ptr[j] < before+after && + if (ptr[j] < before+after && ptr[j] >= 0 && (tst = scan_info_array[j].list[ptr[j]].term) && (!mterm || strcmp (tst, mterm) < 0)) { @@ -2709,52 +2938,74 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, } } if (j0 == -1) - break; - scan_term_untrans (zh, stream->mem, reg_id, - &glist[i+before].term, mterm); - rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, - glist[i+before].term, strlen(glist[i+before].term), - NULL, 0, zapt->term->which, rset_nmem, - key_it_ctrl,key_it_ctrl->scope); - ptr[j0]++; + break; /* no value found, stop */ + + /* get result set for first one , but only if it's within bounds */ + if (lo >= 0) + { + /* get result set for first term */ + scan_term_untrans(zh, stream->mem, reg_id, + &glist[lo].term, mterm); + rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, + glist[lo].term, strlen(glist[lo].term), + NULL, 0, zapt->term->which, rset_nmem, + kc, kc->scope); + } + ptr[j0]++; /* move index for this set .. */ + /* get result set for remaining scan terms */ for (j = j0+1; j= 0 && (tst = scan_info_array[j].list[ptr[j]].term) && !strcmp (tst, mterm)) { - RSET rsets[2]; - - rsets[0] = rset; - rsets[1] = - rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1, - glist[i+before].term, - strlen(glist[i+before].term), NULL, 0, - zapt->term->which,rset_nmem, - key_it_ctrl, key_it_ctrl->scope); - rset = rsmulti_or_create(rset_nmem, key_it_ctrl, - 2, key_it_ctrl->scope, rsets); + if (lo >= 0) + { + RSET rsets[2]; + + rsets[0] = rset; + rsets[1] = + rset_trunc( + zh, &scan_info_array[j].list[ptr[j]].isam_p, 1, + glist[lo].term, + strlen(glist[lo].term), NULL, 0, + zapt->term->which,rset_nmem, + kc, kc->scope); + rset = rsmulti_or_create(rset_nmem, kc, + 2, kc->scope, rsets); + } ptr[j]++; } } - if (limit_set) + if (lo >= 0) { - RSET rsets[2]; - rsets[0] = rset; - rsets[1] = rset_dup(limit_set); - - rset = rsmulti_and_create(rset_nmem, key_it_ctrl, - key_it_ctrl->scope, 2, rsets); + /* merge with limit_set if given */ + if (limit_set) + { + RSET rsets[2]; + rsets[0] = rset; + rsets[1] = rset_dup(limit_set); + + rset = rsmulti_and_create(rset_nmem, kc, + kc->scope, 2, rsets); + } + /* count it */ + count_set(rset, &glist[lo].occurrences); + rset_delete(rset); } - count_set(rset, &glist[i+before].occurrences); - rset_delete(rset); } if (i < after) { *num_entries -= (after-i); *is_partial = 1; + if (*num_entries < 0) + { + (*kc->dec)(kc); + nmem_destroy(rset_nmem); + *num_entries = 0; + return ZEBRA_OK; + } } - /* consider terms before main term */ for (i = 0; i= 0 && (tst = scan_info_array[j].list[before-1-ptr[j]].term) && (!mterm || strcmp (tst, mterm) > 0)) { @@ -2780,19 +3032,19 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, break; scan_term_untrans (zh, stream->mem, reg_id, - &glist[before-1-i].term, mterm); + &glist[lo].term, mterm); rset = rset_trunc (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, - glist[before-1-i].term, strlen(glist[before-1-i].term), + glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which,rset_nmem, - key_it_ctrl,key_it_ctrl->scope); + kc, kc->scope); ptr[j0]++; for (j = j0+1; j= 0 && (tst = scan_info_array[j].list[before-1-ptr[j]].term) && !strcmp (tst, mterm)) { @@ -2802,12 +3054,12 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rsets[1] = rset_trunc( zh, &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1, - glist[before-1-i].term, - strlen(glist[before-1-i].term), NULL, 0, + glist[lo].term, + strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - key_it_ctrl, key_it_ctrl->scope); - rset = rsmulti_or_create(rset_nmem, key_it_ctrl, - 2, key_it_ctrl->scope, rsets); + kc, kc->scope); + rset = rsmulti_or_create(rset_nmem, kc, + 2, kc->scope, rsets); ptr[j]++; } @@ -2818,26 +3070,31 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rsets[0] = rset; rsets[1] = rset_dup(limit_set); - rset = rsmulti_and_create(rset_nmem, key_it_ctrl, - key_it_ctrl->scope, 2, rsets); + rset = rsmulti_and_create(rset_nmem, kc, + kc->scope, 2, rsets); } - count_set (rset, &glist[before-1-i].occurrences); + count_set (rset, &glist[lo].occurrences); rset_delete (rset); } + (*kc->dec)(kc); + nmem_destroy(rset_nmem); i = before-i; if (i) { *is_partial = 1; *position -= i; *num_entries -= i; + if (*num_entries <= 0) + { + *num_entries = 0; + return ZEBRA_OK; + } } - nmem_destroy(rset_nmem); *list = glist + i; /* list is set to first 'real' entry */ yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d", *position, *num_entries); - if (zh->errCode) - yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode); + return ZEBRA_OK; }