X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=4f50db2eae6ceb49c9edb220e2ea174bb9d6642f;hb=9af45a7f129664e5f802ff1cdfce08fbce8b1adb;hp=f57400ee8c01598244646886d38b0a4c6035c86c;hpb=435da88a838541759d5f9657000b6edfae37680b;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index f57400e..4f50db2 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,22 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.16 1995-10-02 16:24:40 adam + * Revision 1.21 1995-10-06 13:52:06 adam + * Bug fixes. Handler may abort further scanning. + * + * Revision 1.20 1995/10/06 11:06:33 adam + * Scan entries include 'occurrences' now. + * + * Revision 1.19 1995/10/06 10:43:56 adam + * Scan added. 'occurrences' in scan entries not set yet. + * + * Revision 1.18 1995/10/04 16:57:20 adam + * Key input and merge sort in one pass. + * + * Revision 1.17 1995/10/04 12:55:17 adam + * Bug fix in ranked search. Use=Any keys inserted. + * + * Revision 1.16 1995/10/02 16:24:40 adam * Use attribute actually used in search requests. * * Revision 1.15 1995/10/02 15:18:52 adam @@ -338,21 +353,31 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, const char *info; AttrType truncation; int truncation_value; + AttrType use; + int use_value; + attr_init (&use, zapt, 1); + use_value = attr_find (&use); + logf (LOG_DEBUG, "use value %d", use_value); attr_init (&truncation, zapt, 5); truncation_value = attr_find (&truncation); logf (LOG_DEBUG, "truncation value %d", truncation_value); + + if (use_value == -1) + use_value = 1016; + i = index_word_prefix (term_dict, 1, use_value); + switch (truncation_value) { case -1: /* not specified */ case 100: /* do not truncate */ - strcpy (term_dict, term_sub); + strcat (term_dict, term_sub); logf (LOG_DEBUG, "dict_lookup: %s", term_dict); if ((info = dict_lookup (zi->wordDict, term_dict))) add_isam_p (info); break; case 1: /* right truncation */ - strcpy (term_dict, term_sub); + strcat (term_dict, term_sub); strcat (term_dict, ".*"); dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); break; @@ -361,7 +386,7 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, zi->errCode = 120; return -1; case 101: /* process # in term */ - for (j = 0, i = 0; term_sub[i] && i < 2; i++) + for (j = strlen(term_dict), i = 0; term_sub[i] && i < 2; i++) term_dict[j++] = term_sub[i]; for (; term_sub[i]; i++) if (term_sub[i] == '#') @@ -375,7 +400,7 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); break; case 102: /* regular expression */ - strcpy (term_dict, term_sub); + strcat (term_dict, term_sub); dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); break; } @@ -384,29 +409,20 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return 0; } -static void field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, char *termz) { - size_t i, j, sizez; - AttrType use; - int use_value; + size_t i, sizez; Z_Term *term = zapt->term; - attr_init (&use, zapt, 1); - use_value = attr_find (&use); - if (use_value == -1) - use_value = 1016; - - i = index_word_prefix (termz, 1, use_value); - sizez = i + term->u.general->len; + sizez = term->u.general->len; if (sizez > IT_MAX_WORD) sizez = IT_MAX_WORD; - for (j = 0; i < sizez; i++, j++) - termz[i] = index_char_cvt (term->u.general->buf[j]); + for (i = 0; i < sizez; i++) + termz[i] = index_char_cvt (term->u.general->buf[i]); termz[i] = '\0'; } - static RSET rpn_search_APT_relevance (ZServerInfo *zi, Z_AttributesPlusTerm *zapt) { @@ -426,7 +442,7 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, zi->errCode = 124; return NULL; } - field_term (zi, zapt, termz); + trans_term (zi, zapt, termz); isam_p_indx = 0; /* global, set by trunc_term - see below */ while (1) { @@ -464,7 +480,7 @@ static RSET rpn_search_APT_word (ZServerInfo *zi, zi->errCode = 124; return NULL; } - field_term (zi, zapt, termz); + trans_term (zi, zapt, termz); isam_p_indx = 0; /* global, set by trunc_term - see below */ if (trunc_term (zi, zapt, termz, &isam_positions)) return NULL; @@ -494,7 +510,7 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, zi->errCode = 124; return NULL; } - field_term (zi, zapt, termz); + trans_term (zi, zapt, termz); isam_p_indx = 0; /* global, set by trunc_term - see below */ if (trunc_term (zi, zapt, termz, &isam_positions)) return NULL; @@ -665,6 +681,92 @@ int rpn_search (ZServerInfo *zi, return zi->errCode; count_set (rset, hits); resultSetAdd (zi, setname, 1, rset); + if (zi->errCode) + logf (LOG_DEBUG, "search error: %d", zi->errCode); return zi->errCode; } +static struct scan_entry *scan_list; +static ODR scan_odr; +static int scan_before, scan_after; +static ISAM scan_isam; +static char scan_prefix[20]; + +static int scan_handle (Dict_char *name, const char *info, int pos) +{ + int len_prefix, idx; + ISAM_P isam_p; + RSET rset; + + rset_isam_parms parms; + + len_prefix = strlen(scan_prefix); + if (memcmp (name, scan_prefix, len_prefix)) + return 1; + if (pos > 0) + idx = scan_after - pos + scan_before; + else + idx = - pos - 1; + scan_list[idx].term = odr_malloc (scan_odr, strlen(name + len_prefix)+1); + strcpy (scan_list[idx].term, name + len_prefix); + assert (*info == sizeof(isam_p)); + memcpy (&isam_p, info+1, sizeof(isam_p)); + parms.is = scan_isam; + parms.pos = isam_p; +#if 1 + rset = rset_create (rset_kind_isam, &parms); + count_set (rset, &scan_list[idx].occurrences); + rset_delete (rset); +#else + scan_list[idx].occurrences = 1; +#endif + logf (LOG_DEBUG, "pos=%3d idx=%3d name=%s", pos, idx, name); + return 0; +} + +int rpn_scan (ZServerInfo *zi, ODR odr, Z_AttributesPlusTerm *zapt, + int *position, int *num_entries, struct scan_entry **list) +{ + int i, j, sizez; + int pos = *position; + int num = *num_entries; + int before; + int after; + char termz[IT_MAX_WORD+20]; + AttrType use; + int use_value; + Z_Term *term = zapt->term; + + logf (LOG_DEBUG, "scan, position = %d, num = %d", pos, num); + scan_before = before = pos-1; + scan_after = after = 1+num-pos; + scan_odr = odr; + + logf (LOG_DEBUG, "scan, before = %d, after = %d", before, after); + + scan_isam = zi->wordIsam; + scan_list = *list = odr_malloc (odr, (before+after)*sizeof(**list)); + for (j = 0; ju.general->len; + if (sizez > IT_MAX_WORD) + sizez = IT_MAX_WORD; + for (j = 0; ju.general->buf[j]); + termz[j+i] = '\0'; + + dict_scan (zi->wordDict, termz, &before, &after, scan_handle); + + if (zi->errCode) + logf (LOG_DEBUG, "search error: %d", zi->errCode); + return 0; +} +