X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=4f50db2eae6ceb49c9edb220e2ea174bb9d6642f;hb=9af45a7f129664e5f802ff1cdfce08fbce8b1adb;hp=cc1893d46593c8aa649e3b74827ff0ee8246e769;hpb=1319f46e376da7a32f1af97f93966e72c7c464bc;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index cc1893d..4f50db2 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,34 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.12 1995-09-15 14:45:21 adam + * Revision 1.21 1995-10-06 13:52:06 adam + * Bug fixes. Handler may abort further scanning. + * + * Revision 1.20 1995/10/06 11:06:33 adam + * Scan entries include 'occurrences' now. + * + * Revision 1.19 1995/10/06 10:43:56 adam + * Scan added. 'occurrences' in scan entries not set yet. + * + * Revision 1.18 1995/10/04 16:57:20 adam + * Key input and merge sort in one pass. + * + * Revision 1.17 1995/10/04 12:55:17 adam + * Bug fix in ranked search. Use=Any keys inserted. + * + * Revision 1.16 1995/10/02 16:24:40 adam + * Use attribute actually used in search requests. + * + * Revision 1.15 1995/10/02 15:18:52 adam + * New member in recRetrieveCtrl: diagnostic. + * + * Revision 1.14 1995/09/28 12:10:32 adam + * Bug fixes. Field prefix used in queries. + * + * Revision 1.13 1995/09/18 14:17:50 adam + * Minor changes. + * + * Revision 1.12 1995/09/15 14:45:21 adam * Retrieve control. * Work on truncation. * @@ -241,7 +268,7 @@ static RSET rset_trunc (ISAM isam, ISAM_P *isam_p, int from, int to, int merge_chunk) { logf (LOG_DEBUG, "rset_trunc, range=%d-%d", from, to-1); - if (from - to > merge_chunk) + if (to - from > merge_chunk) { return NULL; } @@ -254,6 +281,7 @@ static RSET rset_trunc (ISAM isam, ISAM_P *isam_p, int from, int to, RSFD rsfd; rset_temp_parms parms; + ispt = xmalloc (sizeof(*ispt) * (to-from)); parms.key_size = sizeof (struct it_key); result = rset_create (rset_kind_temp, &parms); rsfd = rset_open (result, 1); @@ -279,6 +307,7 @@ static RSET rset_trunc (ISAM isam, ISAM_P *isam_p, int from, int to, is_pt_free (ispt[i]); rset_close (result, rsfd); heap_close (ti); + xfree (ispt); return result; } } @@ -324,21 +353,31 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, const char *info; AttrType truncation; int truncation_value; + AttrType use; + int use_value; + attr_init (&use, zapt, 1); + use_value = attr_find (&use); + logf (LOG_DEBUG, "use value %d", use_value); attr_init (&truncation, zapt, 5); truncation_value = attr_find (&truncation); logf (LOG_DEBUG, "truncation value %d", truncation_value); + + if (use_value == -1) + use_value = 1016; + i = index_word_prefix (term_dict, 1, use_value); + switch (truncation_value) { case -1: /* not specified */ case 100: /* do not truncate */ - strcpy (term_dict, term_sub); + strcat (term_dict, term_sub); logf (LOG_DEBUG, "dict_lookup: %s", term_dict); if ((info = dict_lookup (zi->wordDict, term_dict))) add_isam_p (info); break; case 1: /* right truncation */ - strcpy (term_dict, term_sub); + strcat (term_dict, term_sub); strcat (term_dict, ".*"); dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); break; @@ -347,7 +386,7 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, zi->errCode = 120; return -1; case 101: /* process # in term */ - for (j = 0, i = 0; term_sub[i] && i < 3; i++) + for (j = strlen(term_dict), i = 0; term_sub[i] && i < 2; i++) term_dict[j++] = term_sub[i]; for (; term_sub[i]; i++) if (term_sub[i] == '#') @@ -361,7 +400,7 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); break; case 102: /* regular expression */ - strcpy (term_dict, term_sub); + strcat (term_dict, term_sub); dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); break; } @@ -370,6 +409,20 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return 0; } +static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, + char *termz) +{ + size_t i, sizez; + Z_Term *term = zapt->term; + + sizez = term->u.general->len; + if (sizez > IT_MAX_WORD) + sizez = IT_MAX_WORD; + for (i = 0; i < sizez; i++) + termz[i] = index_char_cvt (term->u.general->buf[i]); + termz[i] = '\0'; +} + static RSET rpn_search_APT_relevance (ZServerInfo *zi, Z_AttributesPlusTerm *zapt) { @@ -378,7 +431,6 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, char term_sub[IT_MAX_WORD+1]; char *p0 = termz, *p1 = NULL; Z_Term *term = zapt->term; - size_t sizez, i; parms.key_size = sizeof(struct it_key); parms.max_rec = 100; @@ -390,13 +442,7 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, zi->errCode = 124; return NULL; } - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD) - sizez = IT_MAX_WORD; - for (i = 0; iu.general->buf[i]); - termz[i] = '\0'; - + trans_term (zi, zapt, termz); isam_p_indx = 0; /* global, set by trunc_term - see below */ while (1) { @@ -428,20 +474,13 @@ static RSET rpn_search_APT_word (ZServerInfo *zi, char termz[IT_MAX_WORD+1]; Z_Term *term = zapt->term; - size_t sizez, i; if (term->which != Z_Term_general) { zi->errCode = 124; return NULL; } - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD) - sizez = IT_MAX_WORD; - for (i = 0; iu.general->buf[i]); - termz[i] = '\0'; - + trans_term (zi, zapt, termz); isam_p_indx = 0; /* global, set by trunc_term - see below */ if (trunc_term (zi, zapt, termz, &isam_positions)) return NULL; @@ -454,7 +493,7 @@ static RSET rpn_search_APT_word (ZServerInfo *zi, return rset_create (rset_kind_isam, &parms); } else - return rset_trunc (zi->wordIsam, isam_positions, 0, isam_p_indx, 200); + return rset_trunc (zi->wordIsam, isam_positions, 0, isam_p_indx, 400); } static RSET rpn_search_APT_phrase (ZServerInfo *zi, @@ -465,20 +504,13 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, char termz[IT_MAX_WORD+1]; Z_Term *term = zapt->term; - size_t sizez, i; if (term->which != Z_Term_general) { zi->errCode = 124; return NULL; } - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD) - sizez = IT_MAX_WORD; - for (i = 0; iu.general->buf[i]); - termz[i] = '\0'; - + trans_term (zi, zapt, termz); isam_p_indx = 0; /* global, set by trunc_term - see below */ if (trunc_term (zi, zapt, termz, &isam_positions)) return NULL; @@ -649,6 +681,92 @@ int rpn_search (ZServerInfo *zi, return zi->errCode; count_set (rset, hits); resultSetAdd (zi, setname, 1, rset); + if (zi->errCode) + logf (LOG_DEBUG, "search error: %d", zi->errCode); return zi->errCode; } +static struct scan_entry *scan_list; +static ODR scan_odr; +static int scan_before, scan_after; +static ISAM scan_isam; +static char scan_prefix[20]; + +static int scan_handle (Dict_char *name, const char *info, int pos) +{ + int len_prefix, idx; + ISAM_P isam_p; + RSET rset; + + rset_isam_parms parms; + + len_prefix = strlen(scan_prefix); + if (memcmp (name, scan_prefix, len_prefix)) + return 1; + if (pos > 0) + idx = scan_after - pos + scan_before; + else + idx = - pos - 1; + scan_list[idx].term = odr_malloc (scan_odr, strlen(name + len_prefix)+1); + strcpy (scan_list[idx].term, name + len_prefix); + assert (*info == sizeof(isam_p)); + memcpy (&isam_p, info+1, sizeof(isam_p)); + parms.is = scan_isam; + parms.pos = isam_p; +#if 1 + rset = rset_create (rset_kind_isam, &parms); + count_set (rset, &scan_list[idx].occurrences); + rset_delete (rset); +#else + scan_list[idx].occurrences = 1; +#endif + logf (LOG_DEBUG, "pos=%3d idx=%3d name=%s", pos, idx, name); + return 0; +} + +int rpn_scan (ZServerInfo *zi, ODR odr, Z_AttributesPlusTerm *zapt, + int *position, int *num_entries, struct scan_entry **list) +{ + int i, j, sizez; + int pos = *position; + int num = *num_entries; + int before; + int after; + char termz[IT_MAX_WORD+20]; + AttrType use; + int use_value; + Z_Term *term = zapt->term; + + logf (LOG_DEBUG, "scan, position = %d, num = %d", pos, num); + scan_before = before = pos-1; + scan_after = after = 1+num-pos; + scan_odr = odr; + + logf (LOG_DEBUG, "scan, before = %d, after = %d", before, after); + + scan_isam = zi->wordIsam; + scan_list = *list = odr_malloc (odr, (before+after)*sizeof(**list)); + for (j = 0; ju.general->len; + if (sizez > IT_MAX_WORD) + sizez = IT_MAX_WORD; + for (j = 0; ju.general->buf[j]); + termz[j+i] = '\0'; + + dict_scan (zi->wordDict, termz, &before, &after, scan_handle); + + if (zi->errCode) + logf (LOG_DEBUG, "search error: %d", zi->errCode); + return 0; +} +