From 5660e317ea2972ea6bb6a4f8a415f71579f71103 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Fri, 6 Aug 2004 10:09:27 +0000 Subject: [PATCH] Using doubles in the position estimates, not to loose precision --- include/isamb.h | 4 ++-- include/rset.h | 8 ++++---- index/zsets.c | 10 +++++----- isamb/isamb.c | 11 ++++++----- rset/rsbool.c | 22 +++++++++------------- rset/rset.c | 8 ++++---- rset/rsisamb.c | 8 ++++---- rset/rsnull.c | 6 +++--- rset/rstemp.c | 6 +++--- 9 files changed, 40 insertions(+), 43 deletions(-) diff --git a/include/isamb.h b/include/isamb.h index 358dc84..1b69450 100644 --- a/include/isamb.h +++ b/include/isamb.h @@ -1,4 +1,4 @@ -/* $Id: isamb.h,v 1.12 2004-08-04 09:59:03 heikki Exp $ +/* $Id: isamb.h,v 1.13 2004-08-06 10:09:27 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -44,7 +44,7 @@ int isamb_pp_read (ISAMB_PP pp, void *buf); int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf); -void isamb_pp_pos (ISAMB_PP pp, zint *current, zint *total); +void isamb_pp_pos (ISAMB_PP pp, double *current, double *total); void isamb_pp_close (ISAMB_PP pp); diff --git a/include/rset.h b/include/rset.h index e42a6e2..079acb2 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.25 2004-08-06 09:43:03 heikki Exp $ +/* $Id: rset.h,v 1.26 2004-08-06 10:09:27 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -46,7 +46,7 @@ struct rset_control int (*f_forward)(RSET ct, RSFD rfd, void *buf, int *term_index, int (*cmpfunc)(const void *p1, const void *p2), const void *untilbuf); - void (*f_pos)(RSFD rfd, zint *current, zint *total); + void (*f_pos)(RSFD rfd, double *current, double *total); /* returns -1,-1 if pos function not implemented for this type */ int (*f_read)(RSFD rfd, void *buf, int *term_index); int (*f_write)(RSFD rfd, const void *buf); @@ -55,7 +55,7 @@ struct rset_control int rset_default_forward(RSET ct, RSFD rfd, void *buf, int *term_index, int (*cmpfunc)(const void *p1, const void *p2), const void *untilbuf); -void rset_default_pos(RSFD rfd, zint *current, zint *total); +void rset_default_pos(RSFD rfd, double *current, double *total); struct rset_term { char *name; @@ -103,7 +103,7 @@ RSET rset_dup (RSET rs); #define rset_forward(rs, fd, buf, indx, cmpfunc, untilbuf) \ (*(rs)->control->f_forward)((rs), (fd), (buf), (indx), (cmpfunc), (untilbuf)) -/* int rset_pos(RSET rs, RSFD fd, zint *current, zint *total); */ +/* int rset_pos(RSET rs, RSFD fd, double *current, double *total); */ #define rset_pos(rs,fd,cur,tot) \ (*(rs)->control->f_pos)( (fd),(cur),(tot)) diff --git a/index/zsets.c b/index/zsets.c index 69d6590..15fa83c 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.51 2004-08-06 09:43:03 heikki Exp $ +/* $Id: zsets.c,v 1.52 2004-08-06 10:09:27 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -726,7 +726,7 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) struct rank_control *rc; struct zset_sort_info *sort_info; const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1"); - zint cur,tot; + double cur,tot; zint est=-2; /* -2 not done, -1 can't do, >0 actual estimate*/ zint esthits; @@ -781,11 +781,11 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) rset_pos(rset,rfd,&cur,&tot); if (tot>0) { f=1.0*cur/tot; - est=(zint)(zebraSet->hits/f); + est=(zint)(0.5+zebraSet->hits/f); /* FIXME - round the guess to 3 digits */ logf(LOG_LOG, "Estimating hits (%s) " - ZINT_FORMAT"->%d" - "; "ZINT_FORMAT"->"ZINT_FORMAT, + "%0.1f->%d" + "; %0.1f->"ZINT_FORMAT, rset->control->desc, cur, zebraSet->hits, tot,est); diff --git a/isamb/isamb.c b/isamb/isamb.c index 3753fd9..8ddb9c7 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,4 +1,4 @@ -/* $Id: isamb.c,v 1.50 2004-08-06 09:43:03 heikki Exp $ +/* $Id: isamb.c,v 1.51 2004-08-06 10:09:27 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -1800,7 +1800,8 @@ int isamb_pp_num (ISAMB_PP pp) } static void isamb_pp_leaf_pos( ISAMB_PP pp, - zint *current, zint *total, void *dummybuf ) + double *current, double *total, + void *dummybuf ) { struct ISAMB_block *p = pp->block[pp->level]; const char *src=p->bytes; @@ -1825,7 +1826,7 @@ static void isamb_pp_leaf_pos( ISAMB_PP pp, (*current)++; } #if ISAMB_DEBUG - logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur="ZINT_FORMAT" tot="ZINT_FORMAT + logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur= %0.1f tot=%0.1f " " ofs=%d sz=%d lev=%d", *current, *total, p->offset, p->size, pp->level); #endif @@ -1833,7 +1834,7 @@ static void isamb_pp_leaf_pos( ISAMB_PP pp, (pp->isamb->method->codec.stop)(decodeClientData); } -static void isamb_pp_upper_pos( ISAMB_PP pp, zint *current, zint *total, +static void isamb_pp_upper_pos( ISAMB_PP pp, double *current, double *total, zint size, int level ) { /* estimates total/current occurrences from here up, excl leaf */ struct ISAMB_block *p = pp->block[level]; @@ -1869,7 +1870,7 @@ static void isamb_pp_upper_pos( ISAMB_PP pp, zint *current, zint *total, isamb_pp_upper_pos(pp, current, total, *total, level-1); } /* upper_pos */ -void isamb_pp_pos( ISAMB_PP pp, zint *current, zint *total ) +void isamb_pp_pos( ISAMB_PP pp, double *current, double *total ) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ struct ISAMB_block *p = pp->block[pp->level]; diff --git a/rset/rsbool.c b/rset/rsbool.c index bc1a6dd..0289566 100644 --- a/rset/rsbool.c +++ b/rset/rsbool.c @@ -1,4 +1,4 @@ -/* $Id: rsbool.c,v 1.34 2004-08-06 09:43:03 heikki Exp $ +/* $Id: rsbool.c,v 1.35 2004-08-06 10:09:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -41,7 +41,7 @@ static void r_rewind (RSFD rfd); static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index, int (*cmpfunc)(const void *p1, const void *p2), const void *untilbuf); -static void r_pos (RSFD rfd, zint *current, zint *total); +static void r_pos (RSFD rfd, double *current, double *total); static int r_read_and (RSFD rfd, void *buf, int *term_index); static int r_read_or (RSFD rfd, void *buf, int *term_index); static int r_read_not (RSFD rfd, void *buf, int *term_index); @@ -278,8 +278,6 @@ static int r_read_and (RSFD rfd, void *buf, int *term_index) struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd; struct rset_bool_info *info = p->info; - { zint cur,tot; r_pos(rfd, &cur, &tot); } - while (p->more_l || p->more_r) { int cmp; @@ -455,7 +453,6 @@ static int r_read_or (RSFD rfd, void *buf, int *term_index) struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd; struct rset_bool_info *info = p->info; - { zint cur,tot; r_pos(rfd, &cur, &tot); } while (p->more_l || p->more_r) { int cmp; @@ -519,7 +516,6 @@ static int r_read_not (RSFD rfd, void *buf, int *term_index) struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd; struct rset_bool_info *info = p->info; - { zint cur,tot; r_pos(rfd, &cur, &tot); } while (p->more_l || p->more_r) { int cmp; @@ -577,13 +573,13 @@ static int r_write (RSFD rfd, const void *buf) return -1; } -static void r_pos (RSFD rfd, zint *current, zint *total) +static void r_pos (RSFD rfd, double *current, double *total) { struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd; struct rset_bool_info *info = p->info; - zint lcur,ltot; - zint rcur,rtot; - float r; + double lcur,ltot; + double rcur,rtot; + double r; ltot=-1; rtot=-1; rset_pos(info->rset_l, p->rfd_l, &lcur, <ot); rset_pos(info->rset_r, p->rfd_r, &rcur, &rtot); @@ -593,16 +589,16 @@ static void r_pos (RSFD rfd, zint *current, zint *total) } if ( rtot<0) { rtot=0; rcur=0;} /* if only one useful, use it */ if ( ltot<0) { ltot=0; lcur=0;} - if ( rtot+ltot == 0 ) { /* empty rset */ + if ( rtot+ltot < 1 ) { /* empty rset */ *current=0; *total=0; return; } r=1.0*(lcur+rcur)/(ltot+rtot); /* weighed average of l and r */ *current=p->hits; - *total=(zint)(0.5+*current/r); + *total=*current/r ; #if RSET_DEBUG - yaz_log(LOG_DEBUG,"bool_pos: (%s/%s) "ZINT_FORMAT"/"ZINT_FORMAT"= %0.4f ", + yaz_log(LOG_DEBUG,"bool_pos: (%s/%s) %0.1f/%0.1f= %0.4f ", info->rset_l->control->desc, info->rset_r->control->desc, *current, *total, r); #endif diff --git a/rset/rset.c b/rset/rset.c index 4421587..dbf7b7b 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.22 2004-08-04 09:59:03 heikki Exp $ +/* $Id: rset.c,v 1.23 2004-08-06 10:09:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -65,9 +65,9 @@ RSET rset_dup (RSET rs) return rs; } -void rset_default_pos (RSFD rfd, zint *current, zint *total) -{ /* FIXME - This function should not be needed, only while */ - /* coding the pos functions. */ +void rset_default_pos (RSFD rfd, double *current, double *total) +{ /* This should never really be needed, but it is still used in */ + /* those rsets that we don't really plan to use, like isam-s */ assert(rfd); assert(current); assert(total); diff --git a/rset/rsisamb.c b/rset/rsisamb.c index ffab998..d3f8399 100644 --- a/rset/rsisamb.c +++ b/rset/rsisamb.c @@ -1,4 +1,4 @@ -/* $Id: rsisamb.c,v 1.12 2004-08-06 09:43:04 heikki Exp $ +/* $Id: rsisamb.c,v 1.13 2004-08-06 10:09:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -39,7 +39,7 @@ static void r_rewind (RSFD rfd); static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index, int (*cmpfunc)(const void *p1, const void *p2), const void *untilbuf); -static void r_pos (RSFD rfd, zint *current, zint *total); +static void r_pos (RSFD rfd, double *current, double *total); static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); @@ -172,13 +172,13 @@ static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index, return i; } -static void r_pos (RSFD rfd, zint *current, zint *total) +static void r_pos (RSFD rfd, double *current, double *total) { struct rset_pp_info *pinfo = (struct rset_pp_info *) rfd; assert(rfd); isamb_pp_pos(pinfo->pt, current, total); #if RSET_DEBUG - logf(LOG_DEBUG,"isamb.r_pos returning "ZINT_FORMAT"/"ZINT_FORMAT, + logf(LOG_DEBUG,"isamb.r_pos returning %0.1f/%0.1f", *current, *total); #endif } diff --git a/rset/rsnull.c b/rset/rsnull.c index c02ff4f..aacf61a 100644 --- a/rset/rsnull.c +++ b/rset/rsnull.c @@ -1,4 +1,4 @@ -/* $Id: rsnull.c,v 1.19 2004-08-04 09:59:03 heikki Exp $ +/* $Id: rsnull.c,v 1.20 2004-08-06 10:09:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -32,7 +32,7 @@ static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); -static void r_pos (RSFD rfd, zint *current, zint *total); +static void r_pos (RSFD rfd, double *current, double *total); static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); @@ -93,7 +93,7 @@ static void r_rewind (RSFD rfd) logf (LOG_DEBUG, "rsnull_rewind"); } -static void r_pos (RSFD rfd, zint *current, zint *total) +static void r_pos (RSFD rfd, double *current, double *total) { assert(rfd); assert(current); diff --git a/rset/rstemp.c b/rset/rstemp.c index 7b5420d..8971b49 100644 --- a/rset/rstemp.c +++ b/rset/rstemp.c @@ -1,4 +1,4 @@ -/* $Id: rstemp.c,v 1.39 2004-08-06 09:43:04 heikki Exp $ +/* $Id: rstemp.c,v 1.40 2004-08-06 10:09:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -42,7 +42,7 @@ static void r_rewind (RSFD rfd); /* static int r_count (RSET ct);*/ static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static void r_pos (RSFD rfd, zint *current, zint *total); +static void r_pos (RSFD rfd, double *current, double *total); static const struct rset_control control = { @@ -371,7 +371,7 @@ static int r_write (RSFD rfd, const void *buf) return 1; } -static void r_pos (RSFD rfd, zint *current, zint *total) +static void r_pos (RSFD rfd, double *current, double *total) { struct rset_temp_rfd *mrfd = (struct rset_temp_rfd*) rfd; *current=mrfd->cur; -- 1.7.10.4