From 810bce66201f40acfd7e8577d3997e6ea385f1cf Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 9 Jun 2005 10:39:52 +0000 Subject: [PATCH] Fixes for hit estimates. Added zebra_set_approx_limit. --- NEWS | 3 +++ include/idzebra/api.h | 13 ++++++++++++- index/index.h | 3 ++- index/zebraapi.c | 9 ++++++++- index/zrpn.c | 25 +++++++++++++++---------- index/zsets.c | 18 ++++++++++++------ rset/rset.c | 6 +++--- 7 files changed, 55 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index 1952f13..993570a 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +Added zebra_set_approx_limit for a ZebraHandle (session). Results +will be approximate if hit count is greater than the limit specified. + Added support for term hit counts. This was not in place for earlier 1.4 versions, but is present in the 1.3 series. Bug #124. diff --git a/include/idzebra/api.h b/include/idzebra/api.h index bd6516a..1b9dd22 100644 --- a/include/idzebra/api.h +++ b/include/idzebra/api.h @@ -1,4 +1,4 @@ -/* $Id: api.h,v 1.25 2005-06-02 11:59:53 adam Exp $ +/* $Id: api.h,v 1.26 2005-06-09 10:39:52 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -175,6 +175,17 @@ void zebra_result(ZebraHandle zh, int *code, char **addinfo); YAZ_EXPORT void zebra_clearError(ZebraHandle zh); + +/** + \brief Set limit before Zebra does approx hit count + \param zh session handle + \param approx_limit the limit + + Results will be approximiate if hit count is greater than the + limit specified. By default there is a high-limit (no limit). +*/ +ZEBRA_RES zebra_set_approx_limit(ZebraHandle zh, zint approx_limit); + /** \brief Search using PQF Query \param zh session handle diff --git a/index/index.h b/index/index.h index 95371fa..131f12c 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.142 2005-06-07 14:53:39 adam Exp $ +/* $Id: index.h,v 1.143 2005-06-09 10:39:53 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -266,6 +266,7 @@ struct zebra_session { char **basenames; int num_basenames; + zint approx_limit; char *reg_name; char *path_reg; diff --git a/index/zebraapi.c b/index/zebraapi.c index 5ee16a2..d536064 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.174 2005-06-07 11:36:38 adam Exp $ +/* $Id: zebraapi.c,v 1.175 2005-06-09 10:39:53 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -115,6 +115,7 @@ ZebraHandle zebra_open (ZebraService zs) zh->num_basenames = 0; zh->basenames = 0; + zh->approx_limit = 1000000000; zh->trans_no = 0; zh->trans_w_no = 0; @@ -864,6 +865,12 @@ ZEBRA_RES zebra_select_databases (ZebraHandle zh, int num_bases, return ZEBRA_OK; } +ZEBRA_RES zebra_set_approx_limit(ZebraHandle zh, zint approx_limit) +{ + zh->approx_limit = approx_limit; + return ZEBRA_OK; +} + ZEBRA_RES zebra_search_RPN(ZebraHandle zh, ODR o, Z_RPNQuery *query, const char *setname, zint *hits) { diff --git a/index/zrpn.c b/index/zrpn.c index 1b74b9a..179f210 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.197 2005-06-07 14:53:39 adam Exp $ +/* $Id: zrpn.c,v 1.198 2005-06-09 10:39:53 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -2698,28 +2698,29 @@ void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type, *dst = nmem_strdup(stream, term_src); } -static void count_set (RSET r, int *count) +static void count_set(ZebraHandle zh, RSET rset, zint *count) { zint psysno = 0; - int kno = 0; struct it_key key; RSFD rfd; yaz_log(YLOG_DEBUG, "count_set"); + rset->hits_limit = zh->approx_limit; + *count = 0; - rfd = rset_open (r, RSETF_READ); - while (rset_read (rfd, &key,0 /* never mind terms */)) + rfd = rset_open(rset, RSETF_READ); + while (rset_read(rfd, &key,0 /* never mind terms */)) { if (key.mem[0] != psysno) { psysno = key.mem[0]; - (*count)++; + if (rfd->counted_items >= rset->hits_limit) + break; } - kno++; } rset_close (rfd); - yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count); + *count = rset->hits_count; } ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, @@ -2994,6 +2995,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, } if (lo >= 0) { + zint count; /* merge with limit_set if given */ if (limit_set) { @@ -3006,7 +3008,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, 2, rsets); } /* count it */ - count_set(rset, &glist[lo].occurrences); + count_set(zh, rset, &count); + glist[lo].occurrences = count; rset_delete(rset); } } @@ -3033,6 +3036,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, const char *tst; RSET rset; int lo = before-1-i; /* offset in result list */ + zint count; for (j = 0; j scope, 2, rsets); } - count_set (rset, &glist[lo].occurrences); + count_set(zh, rset, &count); + glist[lo].occurrences = count; rset_delete (rset); } (*kc->dec)(kc); diff --git a/index/zsets.c b/index/zsets.c index b20041f..024113e 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.88 2005-06-07 14:53:39 adam Exp $ +/* $Id: zsets.c,v 1.89 2005-06-09 10:39:53 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -62,6 +62,7 @@ struct zebra_set { zint cache_position; /* last position */ RSFD cache_rfd; /* rfd (NULL if not existing) */ zint cache_psysno; /* sysno for last position */ + zint approx_limit; /* limit before we do approx */ }; struct zset_sort_entry { @@ -128,6 +129,7 @@ ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, for (i = 0; sort_sequence->specs[i]; i++) ; sort_sequence->num_specs = i; + rset->hits_limit = sset->approx_limit; if (!i) { res = resultSetRank (zh, sset, rset, rset_nmem); @@ -152,7 +154,7 @@ ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, zh->hits = 0; - zebraSet = resultSetAdd (zh, setname, 1); + zebraSet = resultSetAdd(zh, setname, 1); if (!zebraSet) return ZEBRA_FAIL; zebraSet->locked = 1; @@ -164,7 +166,7 @@ ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, zebraSet->basenames = nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames)); for (i = 0; ibasenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]); + zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]); res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem, rpn, zebraSet); @@ -205,7 +207,7 @@ void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, (s->hits)++; } -ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) +ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov) { ZebraSet s; int i; @@ -267,10 +269,11 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) s->rpn = 0; s->cache_position = 0; s->cache_rfd = 0; + s->approx_limit = zh->approx_limit; return s; } -ZebraSet resultSetGet (ZebraHandle zh, const char *name) +ZebraSet resultSetGet(ZebraHandle zh, const char *name) { ZebraSet s; @@ -313,7 +316,7 @@ void resultSetInvalidate (ZebraHandle zh) } } -void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) +void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses) { ZebraSet * ss = &zh->sets; int i; @@ -848,6 +851,7 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, RSFD rfd = rset_open(rset, RSETF_READ); struct rank_control *rc = rank_class->control; double score; + zint count = 0; void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem, @@ -868,6 +872,7 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, { score = (*rc->calc) (handle, psysno); resultSetInsertRank (zh, sort_info, psysno, score, 'A'); + count++; } psysno = this_sys; } @@ -877,6 +882,7 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, { score = (*rc->calc)(handle, psysno); resultSetInsertRank(zh, sort_info, psysno, score, 'A'); + count++; } (*rc->end) (zh->reg, handle); rset_close (rfd); diff --git a/rset/rset.c b/rset/rset.c index 52f3033..648193a 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.50 2005-06-07 14:53:39 adam Exp $ +/* $Id: rset.c,v 1.51 2005-06-09 10:39:53 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -166,7 +166,7 @@ RSET rset_create_base(const struct rset_control *sel, rset->free_list = NULL; rset->use_list = NULL; rset->hits_count = 0; - rset->hits_limit = 1000; + rset->hits_limit = 0; rset->hits_round = 1000; rset->keycontrol = kcontrol; (*kcontrol->inc)(kcontrol); @@ -347,7 +347,7 @@ int rset_default_read(RSFD rfd, void *buf, TERMID *term) if (rc > 0) { if (rfd->counted_items == 0 || - (rset->keycontrol->cmp)(buf, rfd->counted_buf) >= rset->scope) + (rset->keycontrol->cmp)(buf, rfd->counted_buf) > rset->scope) { memcpy(rfd->counted_buf, buf, rset->keycontrol->key_size); rfd->counted_items++; -- 1.7.10.4