X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=2a45b7d90839feba3c49fef8c653f4bd2c16491c;hb=9cf5dedda26c0ebaf38daa6fa3f15a763d6b2fcb;hp=7388b1354761b21290d56b86aec8e02556fbf281;hpb=0b5d38bc84d5261aaed3bce3be748c5d7008f2ff;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 7388b13..2a45b7d 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,17 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.56 1996-11-08 11:10:32 adam + * Revision 1.59 1997-01-17 11:31:46 adam + * Bug fix: complete phrase search didn't work. + * + * Revision 1.58 1996/12/23 15:30:45 adam + * Work on truncation. + * Bug fix: result sets weren't deleted after server shut down. + * + * Revision 1.57 1996/11/11 13:38:02 adam + * Added proximity support in search. + * + * Revision 1.56 1996/11/08 11:10:32 adam * Buffers used during file match got bigger. * Compressed ISAM support everywhere. * Bug fixes regarding masking characters in queries. @@ -345,7 +355,7 @@ static int term_pre (char **src, const char *ct1, const char *ct2) return *s0; } -static int term_100 (char **src, char *dst) +static int term_100 (char **src, char *dst, int space_split) { char *s0, *s1, **map; int i = 0; @@ -357,7 +367,7 @@ static int term_100 (char **src, char *dst) { s1 = s0; map = map_chrs_input (&s0, strlen(s0)); - if (**map == *CHR_SPACE) + if (space_split && **map == *CHR_SPACE) break; while (s1 < s0) { @@ -371,7 +381,7 @@ static int term_100 (char **src, char *dst) return i; } -static int term_101 (char **src, char *dst) +static int term_101 (char **src, char *dst, int space_split) { char *s0, *s1, **map; int i = 0; @@ -391,7 +401,7 @@ static int term_101 (char **src, char *dst) { s1 = s0; map = map_chrs_input (&s0, strlen(s0)); - if (**map == *CHR_SPACE) + if (space_split && **map == *CHR_SPACE) break; while (s1 < s0) { @@ -407,7 +417,7 @@ static int term_101 (char **src, char *dst) } -static int term_103 (char **src, char *dst, int *errors) +static int term_103 (char **src, char *dst, int *errors, int space_split) { int i = 0; char *s0, *s1, **map; @@ -446,9 +456,9 @@ static int term_103 (char **src, char *dst, int *errors) return i; } -static int term_102 (char **src, char *dst) +static int term_102 (char **src, char *dst, int space_split) { - return term_103 (src, dst, NULL); + return term_103 (src, dst, NULL, space_split); } /* gen_regular_rel - generate regular expression from relation @@ -578,7 +588,7 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100 (term_sub, term_dict)) + if (!term_100 (term_sub, term_dict, 1)) return 0; term_value = atoi (term_dict); if (term_value <= 0) @@ -587,7 +597,7 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, gen_regular_rel (term_dict + strlen(term_dict), term_value-1, 1); break; case 2: - if (!term_100 (term_sub, term_dict)) + if (!term_100 (term_sub, term_dict, 1)) return 0; term_value = atoi (term_dict); if (term_value < 0) @@ -596,7 +606,7 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, gen_regular_rel (term_dict + strlen(term_dict), term_value, 1); break; case 4: - if (!term_100 (term_sub, term_dict)) + if (!term_100 (term_sub, term_dict, 1)) return 0; term_value = atoi (term_dict); if (term_value < 0) @@ -605,7 +615,7 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, gen_regular_rel (term_dict + strlen(term_dict), term_value, 0); break; case 5: - if (!term_100 (term_sub, term_dict)) + if (!term_100 (term_sub, term_dict, 1)) return 0; term_value = atoi (term_dict); if (term_value < 0) @@ -628,7 +638,7 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, char **term_sub, int regType, oid_value attributeSet, struct grep_info *grep_info, - int num_bases, char **basenames) + int num_bases, char **basenames, int space_split) { char term_dict[2*IT_MAX_WORD+2]; int j, r, base_no; @@ -641,7 +651,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, attr_init (&use, zapt, 1); use_value = attr_find (&use, &curAttributeSet); - logf (LOG_DEBUG, "use value %d", use_value); + logf (LOG_DEBUG, "field_term, use value %d", use_value); attr_init (&truncation, zapt, 5); truncation_value = attr_find (&truncation, NULL); logf (LOG_DEBUG, "truncation value %d", truncation_value); @@ -699,13 +709,18 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, attributeSet, grep_info, &max_pos)) { j = prefix_len; + logf (LOG_LOG, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); switch (truncation_value) { case -1: /* not specified */ case 100: /* do not truncate */ - term_dict[j++] = '('; - if (!term_100 (&termp, term_dict + j)) + term_dict[j++] = '('; + logf (LOG_LOG, "termp=%s", termp); + if (!term_100 (&termp, term_dict + j, space_split)) + { + logf (LOG_LOG, "aaaaaaaaaaaaargh"); return 0; + } strcat (term_dict, ")"); r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); @@ -714,7 +729,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, break; case 1: /* right truncation */ term_dict[j++] = '('; - if (!term_100 (&termp, term_dict + j)) + if (!term_100 (&termp, term_dict + j, space_split)) return 0; strcat (term_dict, ".*)"); dict_lookup_grep (zi->dict, term_dict, 0, grep_info, @@ -726,7 +741,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return -1; case 101: /* process # in term */ term_dict[j++] = '('; - if (!term_101 (&termp, term_dict + j)) + if (!term_101 (&termp, term_dict + j, space_split)) return 0; strcat (term_dict, ")"); r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, @@ -736,7 +751,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, break; case 102: /* Regexp-1 */ term_dict[j++] = '('; - if (!term_102 (&termp, term_dict + j)) + if (!term_102 (&termp, term_dict + j, space_split)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r); @@ -749,7 +764,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, case 103: /* Regexp-1 */ r = 1; term_dict[j++] = '('; - if (!term_103 (&termp, term_dict + j, &r)) + if (!term_103 (&termp, term_dict + j, &r, space_split)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r); @@ -823,8 +838,8 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, int r; parms.key_size = sizeof(struct it_key); - parms.max_rec = 100; - parms.cmp = key_compare; + parms.max_rec = 1000; + parms.cmp = key_compare_it; parms.is = zi->isam; parms.isc = zi->isamc; parms.no_terms = 0; @@ -845,7 +860,7 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, while (1) { r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info, - num_bases, basenames); + num_bases, basenames, 1); if (r <= 0) break; #ifdef TERM_COUNT @@ -894,7 +909,7 @@ static RSET rpn_search_APT_cphrase (ZServerInfo *zi, grep_info.isam_p_buf = NULL; r = field_term (zi, zapt, &termp, 'p', attributeSet, &grep_info, - num_bases, basenames); + num_bases, basenames, 0); result = rset_trunc (zi, grep_info.isam_p_buf, grep_info.isam_p_indx); #ifdef TERM_COUNT xfree(grep_info.term_no); @@ -903,6 +918,95 @@ static RSET rpn_search_APT_cphrase (ZServerInfo *zi, return result; } +static RSET rpn_proximity (RSET rset1, RSET rset2, int ordered, + int exclusion, int relation, int distance) +{ + int i; + RSFD rsfd1, rsfd2; + int more1, more2; + struct it_key buf1, buf2; + RSFD rsfd_result; + RSET result; + rset_temp_parms parms; + + rsfd1 = rset_open (rset1, RSETF_READ|RSETF_SORT_SYSNO); + more1 = rset_read (rset1, rsfd1, &buf1); + + rsfd2 = rset_open (rset2, RSETF_READ|RSETF_SORT_SYSNO); + more2 = rset_read (rset2, rsfd2, &buf2); + + parms.key_size = sizeof (struct it_key); + result = rset_create (rset_kind_temp, &parms); + rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); + + logf (LOG_DEBUG, "rpn_proximity excl=%d ord=%d rel=%d dis=%d", + exclusion, ordered, relation, distance); + while (more1 && more2) + { + int cmp = key_compare_it (&buf1, &buf2); + if (cmp < -1) + more1 = rset_read (rset1, rsfd1, &buf1); + else if (cmp > 1) + more2 = rset_read (rset2, rsfd2, &buf2); + else + { + int sysno = buf1.sysno; + int seqno[500]; + int n = 0; + + seqno[n++] = buf1.seqno; + while ((more1 = rset_read (rset1, rsfd1, &buf1)) && + sysno == buf1.sysno) + if (n < 500) + seqno[n++] = buf1.seqno; + do + { + for (i = 0; i= */ + if (diff >= distance) + excl = !excl; + break; + case 5: /* > */ + if (diff > distance) + excl = !excl; + break; + case 6: /* != */ + if (diff != distance) + excl = !excl; + break; + } + if (excl) + rset_write (result, rsfd_result, &buf2); + } + } while ((more2 = rset_read (rset2, rsfd2, &buf2)) && + sysno == buf2.sysno); + } + } + rset_close (result, rsfd_result); + rset_close (rset1, rsfd1); + rset_close (rset2, rsfd2); + return result; +} + static RSET rpn_prox (RSET *rset, int rset_no) { int i; @@ -950,7 +1054,7 @@ static RSET rpn_prox (RSET *rset, int rset_no) *more = 0; break; } - cmp = key_compare (buf[i], buf[i-1]); + cmp = key_compare_it (buf[i], buf[i-1]); if (cmp > 1) { more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1]); @@ -1017,7 +1121,7 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, { grep_info.isam_p_indx = 0; r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info, - num_bases, basenames); + num_bases, basenames, 1); if (r < 1) break; rset[rset_no] = rset_trunc (zi, grep_info.isam_p_buf, @@ -1167,8 +1271,10 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, RSET r = NULL; if (zs->which == Z_RPNStructure_complex) { + Z_Operator *zop = zs->u.complex->roperator; rset_bool_parms bool_parms; int soft = 0; + bool_parms.rset_l = rpn_search_structure (zi, zs->u.complex->s1, attributeSet, @@ -1188,9 +1294,9 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, if (rset_is_ranked(bool_parms.rset_r)) soft = 1; bool_parms.key_size = sizeof(struct it_key); - bool_parms.cmp = key_compare; + bool_parms.cmp = key_compare_it; - switch (zs->u.complex->roperator->which) + switch (zop->which) { case Z_Operator_and: r = rset_create (soft ? rset_kind_sand:rset_kind_and, &bool_parms); @@ -1201,8 +1307,30 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, case Z_Operator_and_not: r = rset_create (soft ? rset_kind_snot:rset_kind_not, &bool_parms); break; + case Z_Operator_prox: + if (zop->u.prox->which != Z_ProxCode_known) + { + zi->errCode = 132; + return NULL; + } + if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word) + { + static char val[16]; + zi->errCode = 132; + zi->errString = val; + sprintf (val, "%d", *zop->u.prox->proximityUnitCode); + return NULL; + } + r = rpn_proximity (bool_parms.rset_l, bool_parms.rset_r, + *zop->u.prox->ordered, + (!zop->u.prox->exclusion ? 0 : + *zop->u.prox->exclusion), + *zop->u.prox->relationType, + *zop->u.prox->distance); + break; default: - assert (0); + zi->errCode = 110; + return NULL; } } else if (zs->which == Z_RPNStructure_simple) @@ -1220,12 +1348,14 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, } else { - assert (0); + zi->errCode = 3; + return NULL; } } else { - assert (0); + zi->errCode = 3; + return NULL; } return r; } @@ -1247,12 +1377,12 @@ void count_set_save (RSET *r, int *count) rfd = rset_open (*r, RSETF_READ|RSETF_SORT_SYSNO); while (rset_read (*r, rfd, &key)) { - logf (LOG_DEBUG, "sysno=%-7d seqno=%d", key.sysno, key.seqno); if (key.sysno != psysno) { - rset_write (w, wfd, &key); - psysno = key.sysno; + if (*count < 400) + rset_write (w, wfd, &key); (*count)++; + psysno = key.sysno; } kno++; } @@ -1340,7 +1470,6 @@ static int scan_handle (char *name, const char *info, int pos, void *client) idx = scan_info->after - pos + scan_info->before; else idx = - pos - 1; - logf (LOG_DEBUG, "%-3d %s", idx, name+len_prefix); scan_info->list[idx].term = odr_malloc (scan_info->odr, strlen(name + len_prefix)+1); strcpy (scan_info->list[idx].term, name + len_prefix); @@ -1500,7 +1629,7 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, rset_trunc (zi, &scan_info_array[j].list[ptr[j]].isam_p, 1); bool_parms.key_size = sizeof(struct it_key); - bool_parms.cmp = key_compare; + bool_parms.cmp = key_compare_it; bool_parms.rset_l = rset; bool_parms.rset_r = rset2; @@ -1561,7 +1690,7 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1); bool_parms.key_size = sizeof(struct it_key); - bool_parms.cmp = key_compare; + bool_parms.cmp = key_compare_it; bool_parms.rset_l = rset; bool_parms.rset_r = rset2;