From b169e50ef204402fe272c2ba4df0597e279fe4d7 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Fri, 22 Oct 2004 11:33:28 +0000 Subject: [PATCH] Term counts right in the log (if not in searchresponse) --- include/rset.h | 12 +++++++++--- index/zsets.c | 10 +++------- rset/rset.c | 26 ++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/include/rset.h b/include/rset.h index 4f28919..0fc191f 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.40 2004-10-22 10:58:28 heikki Exp $ +/* $Id: rset.h,v 1.41 2004-10-22 11:33:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -48,11 +48,14 @@ typedef struct rset *RSET; /* Result set */ */ struct rset_term { + /** the term itself */ char *name; - zint nn; /* FIXME - what is this used for */ char *flags; - zint count; int type; + /** the rset corresponding to this term */ + RSET rset; + /** private stuff for the ranking algorithm */ + void *rankpriv; }; typedef struct rset_term *TERMID; @@ -208,6 +211,9 @@ RSET rset_dup (RSET rs); /* int rset_type (RSET) */ #define rset_type(rs) ((rs)->control->desc) +/** rset_count counts or estimates the keys in it*/ +zint rset_count(RSET rs); + RSET rstemp_create( NMEM nmem, const struct key_control *kcontrol, int scope, const char *temp_path); diff --git a/index/zsets.c b/index/zsets.c index 3fd85a0..c284b96 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.67 2004-10-22 10:58:28 heikki Exp $ +/* $Id: zsets.c,v 1.68 2004-10-22 11:33:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -821,15 +821,11 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) for (i = 0; i < n; i++) { - if (est>0) - terms[i]->count = - est=(zint)(terms[i]->count/ratio); - yaz_log (LOG_LOG, "term=\"%s\" nn=" ZINT_FORMAT + yaz_log (LOG_LOG, "term=\"%s\" " " type=%s count=" ZINT_FORMAT, terms[i]->name, - terms[i]->nn, terms[i]->flags, - terms[i]->count); + rset_count(terms[i]->rset)); } xfree(terms); yaz_log (LOG_DEBUG, ZINT_FORMAT " keys, "ZINT_FORMAT" distinct sysnos", diff --git a/rset/rset.c b/rset/rset.c index bbc3fab..0691af1 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.37 2004-10-22 10:58:29 heikki Exp $ +/* $Id: rset.c,v 1.38 2004-10-22 11:33:29 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -86,12 +86,14 @@ RSET rset_create_base(const struct rset_control *sel, else rnew->my_nmem=1; rnew->control = sel; - rnew->count = 1; + rnew->count = 1; /* refcount! */ rnew->priv = 0; rnew->free_list=NULL; rnew->keycontrol=kcontrol; rnew->scope=scope; rnew->term=term; + if (term) + term->rset=rnew; return rnew; } @@ -148,6 +150,22 @@ int rset_default_forward(RSFD rfd, void *buf, TERMID *term, return more; } +/** + * rset_count uses rset_pos to get the total and returns that. + * This is ok for rsisamb, and for some other rsets, but in case of + * booleans etc it will give bad estimate, as nothing has been read + * from that rset + */ +zint rset_count(RSET rs) +{ + double cur,tot; + RSFD rfd=rset_open(rs,0); + rset_pos(rfd,&cur,&tot); + rset_close(rfd); + return (zint)(tot); +} + + /** rset_get_no_terms is a getterms function for those that don't have any */ void rset_get_no_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) { @@ -188,9 +206,9 @@ TERMID rset_term_create (const char *name, int length, const char *flags, t->flags = NULL; else t->flags = nmem_strdup(nmem,flags); - t->nn = -1; - t->count = 0; t->type = type; + t->rankpriv=0; + t->rset=0; return t; } -- 1.7.10.4