X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=4e1acd9e758fbfca21de02ba6584430385c0ce54;hb=eb2b742588ce07fb4516bbca22c93b938b13e433;hp=7a23376e476b023f201e26567bade942bc14104b;hpb=32e3781dfb248c2109df1753d33f9a6aeec2a78a;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 7a23376..4e1acd9 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,16 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.99 1999-12-23 09:03:32 adam + * Revision 1.102 2000-03-15 15:00:31 adam + * First work on threaded version. + * + * Revision 1.101 2000/03/02 14:35:03 adam + * Fixed proximity handling. + * + * Revision 1.100 1999/12/28 15:48:12 adam + * Minor Fix. + * + * Revision 1.99 1999/12/23 09:03:32 adam * Changed behaviour of trunc=105 so that * is regular .* and ! is regular . * * Revision 1.98 1999/11/30 13:48:04 adam @@ -371,9 +380,9 @@ static const char **rpn_char_map_handler (void *vp, const char **from, int len) static void rpn_char_map_prepare (ZebraHandle zh, int reg_type, struct rpn_char_map_info *map_info) { - map_info->zm = zh->zebra_maps; + map_info->zm = zh->service->zebra_maps; map_info->reg_type = reg_type; - dict_grep_cmap (zh->dict, map_info, rpn_char_map_handler); + dict_grep_cmap (zh->service->dict, map_info, rpn_char_map_handler); } typedef struct { @@ -466,7 +475,8 @@ static void term_untrans (ZebraHandle zh, int reg_type, { while (*src) { - const char *cp = zebra_maps_output (zh->zebra_maps, reg_type, &src); + const char *cp = zebra_maps_output (zh->service->zebra_maps, + reg_type, &src); if (!cp) *dst++ = *src++; else @@ -736,7 +746,6 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type, int i = 0; int j = 0; - logf (LOG_LOG, " if (!term_pre (zebra_maps, reg_type, src, "*!", "*!")) return 0; s0 = *src; @@ -927,7 +936,8 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + if (!term_100 (zh->service->zebra_maps, reg_type, + term_sub, term_component, space_split, term_dst)) return 0; logf (LOG_DEBUG, "Relation <"); @@ -956,7 +966,8 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp = '\0'; break; case 2: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + if (!term_100 (zh->service->zebra_maps, reg_type, + term_sub, term_component, space_split, term_dst)) return 0; logf (LOG_DEBUG, "Relation <="); @@ -986,8 +997,8 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp = '\0'; break; case 5: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, - space_split, term_dst)) + if (!term_100 (zh->service->zebra_maps, reg_type, + term_sub, term_component, space_split, term_dst)) return 0; logf (LOG_DEBUG, "Relation >"); @@ -1018,8 +1029,8 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp = '\0'; break; case 4: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, - space_split, term_dst)) + if (!term_100 (zh->service->zebra_maps, reg_type, term_sub, + term_component, space_split, term_dst)) return 0; logf (LOG_DEBUG, "Relation >="); @@ -1055,8 +1066,8 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, case 3: default: logf (LOG_DEBUG, "Relation ="); - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, - space_split, term_dst)) + if (!term_100 (zh->service->zebra_maps, reg_type, term_sub, + term_component, space_split, term_dst)) return 0; strcat (term_tmp, "("); strcat (term_tmp, term_component); @@ -1128,7 +1139,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, } return -1; } - if (zebraExplain_curDatabase (zh->zei, basenames[base_no])) + if (zebraExplain_curDatabase (zh->service->zei, basenames[base_no])) { zh->errCode = 109; /* Database unavailable */ zh->errString = basenames[base_no]; @@ -1141,7 +1152,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, char ord_buf[32]; int i, ord_len; - ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal, + ord = zebraExplain_lookupSU (zh->service->zei, attp.attset_ordinal, local_attr->local); if (ord < 0) continue; @@ -1180,59 +1191,59 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, reg_type, space_split, term_dst)) return 0; logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, - 0, grep_handle); + r = dict_lookup_grep (zh->service->dict, term_dict, 0, + grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); break; case 1: /* right truncation */ term_dict[j++] = '('; - if (!term_100 (zh->zebra_maps, reg_type, + if (!term_100 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ".*)"); - dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; case 2: /* keft truncation */ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100 (zh->zebra_maps, reg_type, + if (!term_100 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); - dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; case 3: /* left&right truncation */ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; - if (!term_100 (zh->zebra_maps, reg_type, + if (!term_100 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ".*)"); - dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; zh->errCode = 120; return -1; case 101: /* process # in term */ term_dict[j++] = '('; - if (!term_101 (zh->zebra_maps, reg_type, + if (!term_101 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r); break; case 102: /* Regexp-1 */ term_dict[j++] = '('; - if (!term_102 (zh->zebra_maps, reg_type, + if (!term_102 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d", @@ -1241,12 +1252,12 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, case 103: /* Regexp-2 */ r = 1; term_dict[j++] = '('; - if (!term_103 (zh->zebra_maps, reg_type, + if (!term_103 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, &r, space_split, term_dst)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r); - r = dict_lookup_grep (zh->dict, term_dict, r, grep_info, + r = dict_lookup_grep (zh->service->dict, term_dict, r, grep_info, &max_pos, 2, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d", @@ -1254,33 +1265,33 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; case 104: /* process # and ! in term */ term_dict[j++] = '('; - if (!term_104 (zh->zebra_maps, reg_type, + if (!term_104 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r); break; case 105: /* process * and ! in term */ term_dict[j++] = '('; - if (!term_105 (zh->zebra_maps, reg_type, + if (!term_105 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst, 1)) return 0; strcat (term_dict, ")"); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); break; case 106: /* process * and ! in term */ term_dict[j++] = '('; - if (!term_105 (zh->zebra_maps, reg_type, + if (!term_105 (zh->service->zebra_maps, reg_type, &termp, term_dict + j, space_split, term_dst, 0)) return 0; strcat (term_dict, ")"); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); @@ -1319,7 +1330,7 @@ static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, while ((len = (cp_end - cp)) > 0) { - map = zebra_maps_input (zh->zebra_maps, reg_type, &cp, len); + map = zebra_maps_input (zh->service->zebra_maps, reg_type, &cp, len); if (**map == *CHR_SPACE) space_map = *map; else @@ -1335,99 +1346,8 @@ static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, termz[i] = '\0'; } -static RSET rpn_proximity (ZebraHandle zh, RSET rset1, RSET rset2, - int ordered, - int exclusion, int relation, int distance) -{ - int i; - RSFD rsfd1, rsfd2; - int more1, more2; - struct it_key buf1, buf2; - RSFD rsfd_result; - RSET result; - rset_temp_parms parms; - int term_index; - - rsfd1 = rset_open (rset1, RSETF_READ); - more1 = rset_read (rset1, rsfd1, &buf1, &term_index); - - rsfd2 = rset_open (rset2, RSETF_READ); - more2 = rset_read (rset2, rsfd2, &buf2, &term_index); - - parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zh->res, "setTmpDir"); - result = rset_create (rset_kind_temp, &parms); - rsfd_result = rset_open (result, RSETF_WRITE); - - logf (LOG_DEBUG, "rpn_proximity excl=%d ord=%d rel=%d dis=%d", - exclusion, ordered, relation, distance); - while (more1 && more2) - { - int cmp = key_compare_it (&buf1, &buf2); - if (cmp < -1) - more1 = rset_read (rset1, rsfd1, &buf1, &term_index); - else if (cmp > 1) - more2 = rset_read (rset2, rsfd2, &buf2, &term_index); - else - { - int sysno = buf1.sysno; - int seqno[500]; - int n = 0; - - seqno[n++] = buf1.seqno; - while ((more1 = rset_read (rset1, rsfd1, &buf1, &term_index)) && - sysno == buf1.sysno) - if (n < 500) - seqno[n++] = buf1.seqno; - do - { - for (i = 0; i= */ - if (diff >= distance) - excl = !excl; - break; - case 5: /* > */ - if (diff > distance) - excl = !excl; - break; - case 6: /* != */ - if (diff != distance) - excl = !excl; - break; - } - if (excl) - rset_write (result, rsfd_result, &buf2); - } - } while ((more2 = rset_read (rset2, rsfd2, &buf2, &term_index)) && - sysno == buf2.sysno); - } - } - rset_close (result, rsfd_result); - rset_close (rset1, rsfd1); - rset_close (rset2, rsfd2); - return result; -} - -static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) +static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, + int ordered, int exclusion, int relation, int distance) { int i; RSFD *rsfd; @@ -1467,6 +1387,11 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) } for (i = 0; i= 0) - { - rset_close (rset[i], rsfd[i]); - xfree (buf[i]); - --i; - } + parms.rset_term = rset_term_create (prox_term, length_prox_term, flags); parms.rset_term->nn = 0; result = rset_create (rset_kind_null, &parms); } - else + else if (ordered && relation == 3 && exclusion == 0 && distance == 1) { + /* special proximity case = phrase search ... */ rset_temp_parms parms; RSFD rsfd_result; @@ -1496,7 +1417,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) flags); parms.rset_term->nn = min_nn; parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zh->res, "setTmpDir"); + parms.temp_path = res_get (zh->service->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); rsfd_result = rset_open (result, RSETF_WRITE); @@ -1540,14 +1461,106 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) more[0] = rset_read (*rset, *rsfd, *buf, &term_index); } } - - for (i = 0; inn = min_nn; + parms.key_size = sizeof (struct it_key); + parms.temp_path = res_get (zh->service->res, "setTmpDir"); + result = rset_create (rset_kind_temp, &parms); + rsfd_result = rset_open (result, RSETF_WRITE); + + while (more[0] && more[1]) { - rset_close (rset[i], rsfd[i]); - xfree (buf[i]); + int cmp = key_compare_it (buf[0], buf[1]); + if (cmp < -1) + more[0] = rset_read (rset[0], rsfd[0], buf[0], &term_index); + else if (cmp > 1) + more[1] = rset_read (rset[1], rsfd[1], buf[1], &term_index); + else + { + int sysno = buf[0]->sysno; + int seqno[500]; + int n = 0; + + seqno[n++] = buf[0]->seqno; + while ((more[0] = rset_read (rset[0], rsfd[0], buf[0], + &term_index)) && + sysno == buf[0]->sysno) + if (n < 500) + seqno[n++] = buf[0]->seqno; + do + { + for (i = 0; iseqno - seqno[i]; + int excl = exclusion; + if (!ordered && diff < 0) + diff = -diff; + switch (relation) + { + case 1: /* < */ + if (diff < distance && diff >= 0) + excl = !excl; + break; + case 2: /* <= */ + if (diff <= distance && diff >= 0) + excl = !excl; + break; + case 3: /* == */ + if (diff == distance && diff >= 0) + excl = !excl; + break; + case 4: /* >= */ + if (diff >= distance && diff >= 0) + excl = !excl; + break; + case 5: /* > */ + if (diff > distance && diff >= 0) + excl = !excl; + break; + case 6: /* != */ + if (diff != distance && diff >= 0) + excl = !excl; + break; + } + if (excl) + { + rset_write (result, rsfd_result, buf[1]); + break; + } + } + } while ((more[1] = rset_read (rset[1], rsfd[1], buf[1], + &term_index)) && + sysno == buf[1]->sysno); + } } rset_close (result, rsfd_result); } + else + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (prox_term, length_prox_term, + flags); + parms.rset_term->nn = 0; + result = rset_create (rset_kind_null, &parms); + } + for (i = 0; izebra_maps, reg_id, ex_list, + wrbuf = zebra_replace(zh->service->zebra_maps, reg_id, ex_list, termz, strlen(termz)); if (!wrbuf) return nmem_strdup(stream, termz); @@ -1653,7 +1666,7 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, } else if (rset_no == 1) return (rset[0]); - result = rpn_prox (zh, rset, rset_no); + result = rpn_prox (zh, rset, rset_no, 1, 0, 3, 1); for (i = 0; izebra_maps, reg_type, term_sub, term_tmp, 1, + if (!term_100 (zh->service->zebra_maps, reg_type, term_sub, term_tmp, 1, term_dst)) return 0; term_value = atoi (term_tmp); @@ -1838,7 +1851,7 @@ static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, sprintf (term_tmp, "(0*%d)", term_value); } logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos, + r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); @@ -1886,7 +1899,7 @@ static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, zh->errCode = 121; return -1; } - if (zebraExplain_curDatabase (zh->zei, basenames[base_no])) + if (zebraExplain_curDatabase (zh->service->zei, basenames[base_no])) { zh->errCode = 109; /* Database unavailable */ zh->errString = basenames[base_no]; @@ -1899,7 +1912,7 @@ static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, char ord_buf[32]; int i, ord_len; - ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal, + ord = zebraExplain_lookupSU (zh->service->zei, attp.attset_ordinal, local_attr->local); if (ord < 0) continue; @@ -2013,7 +2026,7 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt, parms.rset_term = rset_term_create (termz, -1, rank_type); parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zh->res, "setTmpDir"); + parms.temp_path = res_get (zh->service->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); rsfd = rset_open (result, RSETF_WRITE); @@ -2138,7 +2151,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, int sort_flag; char termz[IT_MAX_WORD+1]; - zebra_maps_attr (zh->zebra_maps, zapt, ®_id, &search_type, + zebra_maps_attr (zh->service->zebra_maps, zapt, ®_id, &search_type, &rank_type, &complete_flag, &sort_flag); logf (LOG_DEBUG, "reg_id=%c", reg_id); @@ -2264,12 +2277,22 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs, return NULL; } #endif - r = rpn_proximity (zh, bool_parms.rset_l, bool_parms.rset_r, - *zop->u.prox->ordered, - (!zop->u.prox->exclusion ? 0 : - *zop->u.prox->exclusion), - *zop->u.prox->relationType, - *zop->u.prox->distance); + else + { + RSET rsets[2]; + + rsets[0] = bool_parms.rset_l; + rsets[1] = bool_parms.rset_r; + + r = rpn_prox (zh, rsets, 2, + *zop->u.prox->ordered, + (!zop->u.prox->exclusion ? 0 : + *zop->u.prox->exclusion), + *zop->u.prox->relationType, + *zop->u.prox->distance); + rset_delete (rsets[0]); + rset_delete (rsets[1]); + } break; default: zh->errCode = 110; @@ -2461,7 +2484,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, attr_init (&use, zapt, 1); use_value = attr_find (&use, &attributeset); - if (zebra_maps_attr (zh->zebra_maps, zapt, ®_id, &search_type, + if (zebra_maps_attr (zh->service->zebra_maps, zapt, ®_id, &search_type, &rank_type, &complete_flag, &sort_flag)) { zh->errCode = 113; @@ -2487,7 +2510,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, *num_entries = 0; return; } - if (zebraExplain_curDatabase (zh->zei, basenames[base_no])) + if (zebraExplain_curDatabase (zh->service->zei, basenames[base_no])) { zh->errString = basenames[base_no]; zh->errCode = 109; /* Database unavailable */ @@ -2498,7 +2521,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, { int ord; - ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal, + ord = zebraExplain_lookupSU (zh->service->zei, attp.attset_ordinal, local_attr->local); if (ord > 0) ords[ord_no++] = ord; @@ -2539,8 +2562,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, trans_scan_term (zh, zapt, termz+prefix_len, reg_id); - dict_scan (zh->dict, termz, &before_tmp, &after_tmp, scan_info, - scan_handle); + dict_scan (zh->service->dict, termz, &before_tmp, &after_tmp, + scan_info, scan_handle); } glist = (ZebraScanEntry *) odr_malloc (stream, (before+after)*sizeof(*glist));