X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnsearch.c;h=5f7a84e438ea328b7f9a8d8bae78961f463f23c8;hb=670bc043a748e0539a534643fc3993a6c9426c13;hp=88a411f4fe6ac571f53f5c5d613cf71817570684;hpb=d82c0efad7971d102220a8824e1ea674db5b7fe2;p=idzebra-moved-to-github.git diff --git a/index/rpnsearch.c b/index/rpnsearch.c index 88a411f..5f7a84e 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,4 +1,4 @@ -/* $Id: rpnsearch.c,v 1.15 2007-10-29 09:25:41 adam Exp $ +/* $Id: rpnsearch.c,v 1.17 2007-10-29 20:07:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -234,7 +234,7 @@ static void add_non_space(const char *start, const char *end, } /* term_100: handle term, where trunc = none(no operators at all) */ -static int term_100(ZebraMaps zebra_maps, int reg_type, +static int term_100(ZebraMaps zebra_maps, const char *index_type, const char **src, WRBUF term_dict, int space_split, char *dst_term) { @@ -246,14 +246,14 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, const char *space_start = 0; const char *space_end = 0; - if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split)) + if (!term_pre(zebra_maps, *index_type, src, NULL, NULL, !space_split)) return 0; s0 = *src; while (*s0) { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split) { @@ -294,7 +294,7 @@ static int term_100(ZebraMaps zebra_maps, int reg_type, } /* term_101: handle term, where trunc = Process # */ -static int term_101(ZebraMaps zebra_maps, int reg_type, +static int term_101(ZebraMaps zebra_maps, const char *index_type, const char **src, WRBUF term_dict, int space_split, char *dst_term) { @@ -303,7 +303,7 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "#", "#", !space_split)) return 0; s0 = *src; while (*s0) @@ -318,7 +318,7 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; @@ -334,7 +334,8 @@ static int term_101(ZebraMaps zebra_maps, int reg_type, } /* term_103: handle term, where trunc = re-2 (regular expressions) */ -static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, +static int term_103(ZebraMaps zebra_maps, const char *index_type, + const char **src, WRBUF term_dict, int *errors, int space_split, char *dst_term) { @@ -343,7 +344,7 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, const char *s0; const char **map; - if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "^\\()[].*+?|", "(", !space_split)) return 0; s0 = *src; if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && @@ -367,7 +368,7 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; @@ -384,16 +385,17 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src, } /* term_103: handle term, where trunc = re-1 (regular expressions) */ -static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src, +static int term_102(ZebraMaps zebra_maps, const char *index_type, + const char **src, WRBUF term_dict, int space_split, char *dst_term) { - return term_103(zebra_maps, reg_type, src, term_dict, NULL, space_split, + return term_103(zebra_maps, index_type, src, term_dict, NULL, space_split, dst_term); } /* term_104: handle term, process # and ! */ -static int term_104(ZebraMaps zebra_maps, int reg_type, +static int term_104(ZebraMaps zebra_maps, const char *index_type, const char **src, WRBUF term_dict, int space_split, char *dst_term) { @@ -402,7 +404,7 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "?*#", "?*#", !space_split)) return 0; s0 = *src; while (*s0) @@ -447,7 +449,7 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; @@ -463,7 +465,7 @@ static int term_104(ZebraMaps zebra_maps, int reg_type, } /* term_105/106: handle term, where trunc = Process * and ! and right trunc */ -static int term_105(ZebraMaps zebra_maps, int reg_type, +static int term_105(ZebraMaps zebra_maps, const char *index_type, const char **src, WRBUF term_dict, int space_split, char *dst_term, int right_truncate) { @@ -472,7 +474,7 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split)) + if (!term_pre(zebra_maps, *index_type, src, "*!", "*!", !space_split)) return 0; s0 = *src; while (*s0) @@ -493,7 +495,7 @@ static int term_105(ZebraMaps zebra_maps, int reg_type, { const char *s1 = s0; int q_map_match = 0; - map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), + map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), &q_map_match); if (space_split && **map == *CHR_SPACE) break; @@ -650,7 +652,7 @@ void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx) static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, - int reg_type, int space_split, char *term_dst, + const char *index_type, int space_split, char *term_dst, int *error_code) { AttrType relation; @@ -666,7 +668,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100(zh->reg->zebra_maps, reg_type, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) { @@ -702,7 +704,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 2: - if (!term_100(zh->reg->zebra_maps, reg_type, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) { @@ -739,7 +741,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 5: - if (!term_100(zh->reg->zebra_maps, reg_type, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) { wrbuf_destroy(term_component); @@ -773,7 +775,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, wrbuf_putc(term_dict, ')'); break; case 4: - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) { wrbuf_destroy(term_component); @@ -815,7 +817,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!**term_sub) return 1; yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_component, space_split, term_dst)) { wrbuf_destroy(term_component); @@ -845,8 +847,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, + const char *index_type, int complete_flag, char *term_dst, const char *xpath_use, struct ord_list **ol); @@ -903,8 +904,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, const char **term_sub, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, + const char *index_type, int complete_flag, char *term_dst, const char *rank_type, const char *xpath_use, @@ -923,7 +923,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, term_dict, attributeSet, stream, grep_info, - reg_type, complete_flag, num_bases, basenames, + index_type, complete_flag, term_dst, xpath_use, &ol); wrbuf_destroy(term_dict); if (res != ZEBRA_OK) @@ -935,7 +935,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx, term_dst, strlen(term_dst), rank_type, 1 /* preserve pos */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, hits_limit_value, + kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); if (!*rset) return ZEBRA_FAIL; @@ -947,205 +947,189 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, + const char *index_type, int complete_flag, char *term_dst, const char *xpath_use, struct ord_list **ol) { - int r, base_no; + int r; AttrType truncation; int truncation_value; const char *termp; struct rpn_char_map_info rcmi; - int space_split = complete_flag ? 0 : 1; - - int bases_ok = 0; /* no of databases with OK attribute */ + int space_split = complete_flag ? 0 : 1; + int ord = -1; + int regex_range = 0; + int max_pos, prefix_len = 0; + int relation_error; + char ord_buf[32]; + int ord_len, i; + *ol = ord_list_create(stream); - rpn_char_map_prepare(zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, *index_type, &rcmi); attr_init_APT(&truncation, zapt, 5); truncation_value = attr_find(&truncation, NULL); yaz_log(log_level_rpn, "truncation value %d", truncation_value); - for (base_no = 0; base_no < num_bases; base_no++) + termp = *term_sub; /* start of term for each database */ + + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) { - int ord = -1; - int regex_range = 0; - int max_pos, prefix_len = 0; - int relation_error; - char ord_buf[32]; - int ord_len, i; - - termp = *term_sub; /* start of term for each database */ - - - if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) + *term_sub = 0; + return ZEBRA_FAIL; + } + + wrbuf_rewind(term_dict); /* new dictionary regexp term */ + + *ol = ord_list_append(stream, *ol, ord); + ord_len = key_SU_encode(ord, ord_buf); + + wrbuf_putc(term_dict, '('); + + for (i = 0; ireg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { - wrbuf_putc(term_dict, 1); /* our internal regexp escape char */ - wrbuf_putc(term_dict, ord_buf[i]); + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); + break; + case 2: /* keft truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; } wrbuf_putc(term_dict, ')'); - - prefix_len = wrbuf_len(term_dict); - - switch (truncation_value) + break; + case 3: /* left&right truncation */ + wrbuf_puts(term_dict, "(.*"); + if (!term_100(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) { - case -1: /* not specified */ - case 100: /* do not truncate */ - if (!string_relation(zh, zapt, &termp, term_dict, - attributeSet, - reg_type, space_split, term_dst, - &relation_error)) - { - if (relation_error) - { - zebra_setError(zh, relation_error, 0); - return ZEBRA_FAIL; - } - *term_sub = 0; - return ZEBRA_OK; - } - break; - case 1: /* right truncation */ - wrbuf_putc(term_dict, '('); - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ".*)"); - break; - case 2: /* keft truncation */ - wrbuf_puts(term_dict, "(.*"); - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 3: /* left&right truncation */ - wrbuf_puts(term_dict, "(.*"); - if (!term_100(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ".*)"); - break; - case 101: /* process # in term */ - wrbuf_putc(term_dict, '('); - if (!term_101(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_puts(term_dict, ")"); - break; - case 102: /* Regexp-1 */ - wrbuf_putc(term_dict, '('); - if (!term_102(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 103: /* Regexp-2 */ - regex_range = 1; - wrbuf_putc(term_dict, '('); - if (!term_103(zh->reg->zebra_maps, reg_type, - &termp, term_dict, ®ex_range, - space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 104: /* process # and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_104(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 105: /* process * and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_105(zh->reg->zebra_maps, reg_type, - &termp, term_dict, space_split, term_dst, 1)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - case 106: /* process * and ! in term */ - wrbuf_putc(term_dict, '('); - if (!term_105(zh->reg->zebra_maps, reg_type, + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ".*)"); + break; + case 101: /* process # in term */ + wrbuf_putc(term_dict, '('); + if (!term_101(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_puts(term_dict, ")"); + break; + case 102: /* Regexp-1 */ + wrbuf_putc(term_dict, '('); + if (!term_102(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 103: /* Regexp-2 */ + regex_range = 1; + wrbuf_putc(term_dict, '('); + if (!term_103(zh->reg->zebra_maps, index_type, + &termp, term_dict, ®ex_range, + space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 104: /* process # and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_104(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 105: /* process * and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zh->reg->zebra_maps, index_type, + &termp, term_dict, space_split, term_dst, 1)) + { + *term_sub = 0; + return ZEBRA_OK; + } + wrbuf_putc(term_dict, ')'); + break; + case 106: /* process * and ! in term */ + wrbuf_putc(term_dict, '('); + if (!term_105(zh->reg->zebra_maps, index_type, &termp, term_dict, space_split, term_dst, 0)) - { - *term_sub = 0; - return ZEBRA_OK; - } - wrbuf_putc(term_dict, ')'); - break; - default: - zebra_setError_zint(zh, - YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, - truncation_value); - return ZEBRA_FAIL; + { + *term_sub = 0; + return ZEBRA_OK; } - if (1) - { - char buf[1000]; - const char *input = wrbuf_cstr(term_dict) + prefix_len; - esc_str(buf, sizeof(buf), input, strlen(input)); - } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", - wrbuf_cstr(term_dict) + prefix_len); - r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, - grep_info, &max_pos, - ord_len /* number of "exact" chars */, - grep_handle); - if (r == 1) - zebra_set_partial_result(zh); - else if (r) - yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); - } - if (!bases_ok) + wrbuf_putc(term_dict, ')'); + break; + default: + zebra_setError_zint(zh, + YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, + truncation_value); return ZEBRA_FAIL; + } + if (1) + { + char buf[1000]; + const char *input = wrbuf_cstr(term_dict) + prefix_len; + esc_str(buf, sizeof(buf), input, strlen(input)); + } + yaz_log(log_level_rpn, "dict_lookup_grep: %s", + wrbuf_cstr(term_dict) + prefix_len); + r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, + grep_info, &max_pos, + ord_len /* number of "exact" chars */, + grep_handle); + if (r == 1) + zebra_set_partial_result(zh); + else if (r) + yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; @@ -1230,12 +1214,10 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, \param termz term as used in query but converted to UTF-8 \param attributeSet default attribute set \param stream memory for result - \param reg_type register type ('w', 'p',..) + \param index_type register type ("w", "p",..) \param complete_flag whether it's phrases or not \param rank_type term flags for ranking \param xpath_use use attribute for X-Path (-1 for no X-path) - \param num_bases number of databases - \param basenames array of databases \param rset_nmem memory for result sets \param result_sets output result set for each term in list (output) \param num_result_sets number of output result sets @@ -1246,10 +1228,9 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, const char *termz, const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET **result_sets, int *num_result_sets, struct rset_key_control *kc) @@ -1261,7 +1242,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, *num_result_sets = 0; *term_dst = 0; - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while(1) { @@ -1279,8 +1260,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, } res = term_trunc(zh, zapt, &termp, attributeSet, stream, &grep_info, - reg_type, complete_flag, - num_bases, basenames, + index_type, complete_flag, term_dst, rank_type, xpath_use, rset_nmem, &(*result_sets)[*num_result_sets], @@ -1307,18 +1287,20 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, - int reg_type, - int num_bases, char **basenames, + const char *index_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - RSET *f_set; - int base_no; int position_value; - int num_sets = 0; AttrType position; - + int ord = -1; + char ord_buf[32]; + char term_dict[100]; + int ord_len; + char *val; + ISAM_P isam_p; + attr_init_APT(&position, zapt, 3); position_value = attr_find(&position, NULL); switch(position_value) @@ -1335,7 +1317,7 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type)) + if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, *index_type)) { zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, position_value); @@ -1348,50 +1330,28 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, position_value); return ZEBRA_FAIL; } - f_set = xmalloc(sizeof(RSET) * num_bases); - for (base_no = 0; base_no < num_bases; base_no++) - { - int ord = -1; - char ord_buf[32]; - char term_dict[100]; - int ord_len; - char *val; - ISAM_P isam_p; - if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - return ZEBRA_FAIL; - } - - if (zebra_apt_get_ord(zh, zapt, reg_type, 0, - attributeSet, &ord) != ZEBRA_OK) - continue; - - ord_len = key_SU_encode(ord, ord_buf); - memcpy(term_dict, ord_buf, ord_len); - strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); - val = dict_lookup(zh->reg->dict, term_dict); - if (!val) - continue; + if (zebra_apt_get_ord(zh, zapt, index_type, 0, + attributeSet, &ord) != ZEBRA_OK) + { + return ZEBRA_FAIL; + } + ord_len = key_SU_encode(ord, ord_buf); + memcpy(term_dict, ord_buf, ord_len); + strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR); + val = dict_lookup(zh->reg->dict, term_dict); + if (val) + { assert(*val == sizeof(ISAM_P)); memcpy(&isam_p, val+1, sizeof(isam_p)); - if (zh->reg->isamb) - f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope, - zh->reg->isamb, isam_p, 0); + *rset = rsisamb_create(rset_nmem, kc, kc->scope, + zh->reg->isamb, isam_p, 0); else if (zh->reg->isamc) - f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope, - zh->reg->isamc, isam_p, 0); - } - if (num_sets) - { - *rset = rset_create_or(rset_nmem, kc, kc->scope, - 0 /* termid */, num_sets, f_set); + *rset = rsisamc_create(rset_nmem, kc, kc->scope, + zh->reg->isamc, isam_p, 0); } - xfree(f_set); return ZEBRA_OK; } @@ -1400,10 +1360,9 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, const char *termz_org, const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1412,9 +1371,8 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, int num_result_sets = 0; ZEBRA_RES res = term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, + stream, index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, &result_sets, &num_result_sets, kc); @@ -1425,8 +1383,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, { RSET first_set = 0; res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, - num_bases, basenames, + index_type, rset_nmem, &first_set, kc); if (res != ZEBRA_OK) @@ -1460,10 +1417,10 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, const char *termz_org, const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1473,9 +1430,8 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, int i; ZEBRA_RES res = term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, + stream, index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, &result_sets, &num_result_sets, kc); if (res != ZEBRA_OK) @@ -1485,8 +1441,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, { RSET first_set = 0; res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, - num_bases, basenames, + index_type, rset_nmem, &first_set, kc); if (res != ZEBRA_OK) @@ -1527,10 +1482,10 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, const char *termz_org, const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1540,9 +1495,8 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, int i; ZEBRA_RES res = term_list_trunc(zh, zapt, termz_org, attributeSet, - stream, reg_type, complete_flag, + stream, index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, &result_sets, &num_result_sets, kc); @@ -1552,8 +1506,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, { RSET first_set = 0; res = rpn_search_APT_position(zh, zapt, attributeSet, - reg_type, - num_bases, basenames, + index_type, rset_nmem, &first_set, kc); if (res != ZEBRA_OK) @@ -1585,7 +1538,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, *rset = result_sets[0]; else *rset = rset_create_and(rset_nmem, kc, kc->scope, - num_result_sets, result_sets); + num_result_sets, result_sets); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; @@ -1597,7 +1550,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, struct grep_info *grep_info, int *max_pos, - int reg_type, + const char *index_type, char *term_dst, int *error_code) { @@ -1617,7 +1570,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, { case 1: yaz_log(log_level_rpn, "Relation <"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) { wrbuf_destroy(term_num); @@ -1628,7 +1581,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 2: yaz_log(log_level_rpn, "Relation <="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) { wrbuf_destroy(term_num); @@ -1639,7 +1592,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 4: yaz_log(log_level_rpn, "Relation >="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) { wrbuf_destroy(term_num); @@ -1650,7 +1603,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 5: yaz_log(log_level_rpn, "Relation >"); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) { wrbuf_destroy(term_num); @@ -1662,7 +1615,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, case -1: case 3: yaz_log(log_level_rpn, "Relation ="); - if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1, + if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1, term_dst)) { wrbuf_destroy(term_num); @@ -1698,72 +1651,56 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, - int reg_type, int complete_flag, - int num_bases, char **basenames, + const char *index_type, int complete_flag, char *term_dst, const char *xpath_use, struct ord_list **ol) { - int base_no; const char *termp; struct rpn_char_map_info rcmi; - - int bases_ok = 0; /* no of databases with OK attribute */ - + int max_pos; + int relation_error = 0; + int ord, ord_len, i; + char ord_buf[32]; + *ol = ord_list_create(stream); - rpn_char_map_prepare(zh->reg, reg_type, &rcmi); + rpn_char_map_prepare(zh->reg, *index_type, &rcmi); - for (base_no = 0; base_no < num_bases; base_no++) + termp = *term_sub; + + if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) { - int max_pos; - int relation_error = 0; - int ord, ord_len, i; - char ord_buf[32]; - - termp = *term_sub; - - if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) + return ZEBRA_FAIL; + } + + wrbuf_rewind(term_dict); + + *ol = ord_list_append(stream, *ol, ord); + + ord_len = key_SU_encode(ord, ord_buf); + + wrbuf_putc(term_dict, '('); + for (i = 0; i < ord_len; i++) + { + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, ord_buf[i]); + } + wrbuf_putc(term_dict, ')'); + + if (!numeric_relation(zh, zapt, &termp, term_dict, + attributeSet, grep_info, &max_pos, index_type, + term_dst, &relation_error)) + { + if (relation_error) { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); + zebra_setError(zh, relation_error, 0); return ZEBRA_FAIL; } - - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, - attributeSet, &ord) != ZEBRA_OK) - continue; - bases_ok++; - - wrbuf_rewind(term_dict); - - *ol = ord_list_append(stream, *ol, ord); - - ord_len = key_SU_encode(ord, ord_buf); - - wrbuf_putc(term_dict, '('); - for (i = 0; i < ord_len; i++) - { - wrbuf_putc(term_dict, 1); - wrbuf_putc(term_dict, ord_buf[i]); - } - wrbuf_putc(term_dict, ')'); - - if (!numeric_relation(zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, reg_type, - term_dst, &relation_error)) - { - if (relation_error) - { - zebra_setError(zh, relation_error, 0); - return ZEBRA_FAIL; - } - *term_sub = 0; - return ZEBRA_OK; - } + *term_sub = 0; + return ZEBRA_OK; } - if (!bases_ok) - return ZEBRA_FAIL; *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; @@ -1775,10 +1712,10 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, const char *termz, const Odr_oid *attributeSet, NMEM stream, - int reg_type, int complete_flag, + const char *index_type, + int complete_flag, const char *rank_type, const char *xpath_use, - int num_bases, char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -1796,7 +1733,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); - if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) { @@ -1816,7 +1753,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, grep_info.isam_p_indx = 0; res = numeric_term(zh, zapt, &termp, term_dict, attributeSet, stream, &grep_info, - reg_type, complete_flag, num_bases, basenames, + index_type, complete_flag, term_dst, xpath_use, &ol); wrbuf_destroy(term_dict); if (res == ZEBRA_FAIL || termp == 0) @@ -1828,7 +1765,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, strlen(term_dst), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type, + kc, kc->scope, ol, index_type, hits_limit_value, term_ref_id_str); if (!result_sets[num_result_sets]) @@ -1985,7 +1922,7 @@ static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static RSET xpath_trunc(ZebraHandle zh, NMEM stream, - int reg_type, const char *term, + const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc) @@ -1993,7 +1930,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, struct grep_info grep_info; int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, - reg_type, xpath_use); + index_type, xpath_use); if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); @@ -2026,7 +1963,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, rset = rset_trunc(zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term, strlen(term), flags, 1, term_type, rset_nmem, - kc, kc->scope, 0, reg_type, 0 /* hits_limit */, + kc, kc->scope, 0, index_type, 0 /* hits_limit */, 0 /* term_ref_id_str */); grep_info_delete(&grep_info); wrbuf_destroy(term_dict); @@ -2036,14 +1973,12 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, static ZEBRA_RES rpn_search_xpath(ZebraHandle zh, - int num_bases, char **basenames, NMEM stream, const char *rank_type, RSET rset, int xpath_len, struct xpath_location_step *xpath, NMEM rset_nmem, RSET *rset_out, struct rset_key_control *kc) { - int base_no; int i; int always_matches = rset ? 0 : 1; @@ -2080,19 +2015,11 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, */ dict_grep_cmap(zh->reg->dict, 0, 0); - - for (base_no = 0; base_no < num_bases; base_no++) + { int level = xpath_len; int first_path = 1; - if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) - { - zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, - basenames[base_no]); - *rset_out = rset; - return ZEBRA_FAIL; - } while (--level >= 0) { WRBUF xpath_rev = wrbuf_alloc(); @@ -2142,7 +2069,8 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } } rset_attr = xpath_trunc( - zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, + zh, stream, "0", wrbuf_cstr(wbuf), + ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); wrbuf_destroy(wbuf); } @@ -2158,7 +2086,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { - rset_start_tag = xpath_trunc(zh, stream, '0', + rset_start_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); @@ -2166,7 +2094,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, rset = rset_start_tag; else { - rset_end_tag = xpath_trunc(zh, stream, '0', + rset_end_tag = xpath_trunc(zh, stream, "0", wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_END, rset_nmem, kc); @@ -2186,6 +2114,14 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, NMEM stream, + Z_SortKeySpecList *sort_sequence, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc); + static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, @@ -2194,8 +2130,54 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, RSET *rset, struct rset_key_control *kc) { + RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets)); + ZEBRA_RES res = ZEBRA_OK; + int i; + for (i = 0; i < num_bases; i++) + { + + if (zebraExplain_curDatabase(zh->reg->zei, basenames[i])) + { + zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, + basenames[i]); + res = ZEBRA_FAIL; + break; + } + res = rpn_search_database(zh, zapt, attributeSet, stream, + sort_sequence, + rset_nmem, rsets+i, kc); + if (res != ZEBRA_OK) + break; + } + if (res != ZEBRA_OK) + { /* must clean up the already created sets */ + while (--i >= 0) + rset_delete(rsets[i]); + *rset = 0; + } + else + { + if (num_bases == 1) + *rset = rsets[0]; + else if (num_bases == 0) + *rset = rset_create_null(rset_nmem, kc, 0); + else + *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */, + num_bases, rsets); + } + return res; +} + +static ZEBRA_RES rpn_search_database(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const Odr_oid *attributeSet, NMEM stream, + Z_SortKeySpecList *sort_sequence, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) +{ ZEBRA_RES res = ZEBRA_OK; - unsigned reg_id; + const char *index_type; char *search_type = NULL; char rank_type[128]; int complete_flag; @@ -2210,10 +2192,10 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, log_level_rpn = yaz_log_module_level("rpn"); log_level_set = 1; } - zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type, + zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag); - yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id); + yaz_log(YLOG_DEBUG, "index_type=%s", index_type); yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag); yaz_log(YLOG_DEBUG, "search_type=%s", search_type); yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type); @@ -2245,8 +2227,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (relation_value == 103) /* alwaysmatches */ { *rset = 0; /* signal no "term" set */ - return rpn_search_xpath(zh, num_bases, basenames, - stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } } @@ -2263,25 +2244,25 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!strcmp(search_type, "phrase")) { res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "and-list")) { res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "or-list")) { res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else if (!strcmp(search_type, "local")) @@ -2292,9 +2273,9 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else if (!strcmp(search_type, "numeric")) { res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, + index_type, complete_flag, rank_type, xpath_use, - num_bases, basenames, rset_nmem, + rset_nmem, rset, kc); } else @@ -2306,8 +2287,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return res; if (!*rset) return ZEBRA_FAIL; - return rpn_search_xpath(zh, num_bases, basenames, - stream, rank_type, *rset, + return rpn_search_xpath(zh, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); }