From 3c547fb0a2e275ef21871d865116e288114c0910 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Fri, 22 Oct 2004 10:12:51 +0000 Subject: [PATCH] Added a routine to get an array of terms in a query, in preparation for ranking --- include/rset.h | 25 +++++++++++++++++++++++-- index/zsets.c | 19 +++++++++++++++++-- rset/rsbetween.c | 11 ++++++++++- rset/rsbool.c | 15 ++++++++++++++- rset/rset.c | 22 +++++++++++++++++++++- rset/rsisamb.c | 3 ++- rset/rsisamc.c | 5 ++++- rset/rsisams.c | 5 ++++- rset/rsmultiandor.c | 28 +++++++++++++++++++++++++++- rset/rsnull.c | 3 ++- rset/rsprox.c | 16 +++++++++++++++- rset/rstemp.c | 3 ++- 12 files changed, 141 insertions(+), 14 deletions(-) diff --git a/include/rset.h b/include/rset.h index 0b529a7..2fa67a3 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.38 2004-10-20 14:32:28 heikki Exp $ +/* $Id: rset.h,v 1.39 2004-10-22 10:12:51 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -78,11 +78,19 @@ struct rsfd { /* the stuff common to all rsfd's. */ */ struct rset_control { - char *desc; /* text description of set type (for debugging) */ + /** text description of set type (for debugging) */ + char *desc; /* RSET rs_something_create(const struct rset_control *sel, ...); */ void (*f_delete)(RSET ct); + + /** recursively fills the terms array with terms. call with curterm=0 */ + /* always counts them all into cur, but of course won't touch the term */ + /* array past max. You can use this to count, set max=0 */ + void (*f_getterms)(RSET ct, TERMID *terms, int maxterms, int *curterm); + RSFD (*f_open)(RSET ct, int wflag); void (*f_close)(RSFD rfd); + /** forward behaves like a read, but it skips some items first */ int (*f_forward)(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); void (*f_pos)(RSFD rfd, double *current, double *total); /* returns -1,-1 if pos function not implemented for this type */ @@ -94,6 +102,15 @@ struct rset_control int rset_default_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); +/** rset_get_no_terms is a getterms function for those that don't have any */ +void rset_get_no_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); + +/** + * rset_get_one_term is a getterms function for those rsets that have + * exactly one term, like all rsisamX types. + */ +void rset_get_one_term(RSET ct,TERMID *terms,int maxterms,int *curterm); + /** * key_control contains all there is to know about the keys stored in * an isam, and therefore operated by the rsets. Other than this info, @@ -173,6 +190,10 @@ RSET rset_dup (RSET rs); #define rset_forward(rfd, buf, term, untilbuf) \ (*(rfd)->rset->control->f_forward)((rfd),(buf),(term),(untilbuf)) +/* void rset_getterms(RSET ct, TERMID *terms, int maxterms, int *curterm); */ +#define rset_getterms(ct, terms, maxterms, curterm) \ + (*(ct)->control->f_getterms)((ct),(terms),(maxterms),(curterm)) + /* int rset_pos(RSFD fd, double *current, double *total); */ #define rset_pos(rfd,cur,tot) \ (*(rfd)->rset->control->f_pos)( (rfd),(cur),(tot)) diff --git a/index/zsets.c b/index/zsets.c index e77e5fb..cabefa6 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.65 2004-10-21 12:43:09 heikki Exp $ +/* $Id: zsets.c,v 1.66 2004-10-22 10:12:51 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -733,7 +733,8 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) struct it_key key; RSFD rfd; TERMID termid; - int i; + TERMID *terms; + int i,n; ZebraRankClass rank_class; struct rank_control *rc; struct zset_sort_info *sort_info; @@ -810,6 +811,20 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) (*rc->end) (zh->reg, handle); } rset_close (rfd); + n=0; + logf(LOG_LOG,"FIXME: Counting terms"); + rset_getterms(rset,0,0,&n); + logf(LOG_LOG,"FIXME: Got %d terms", n); + terms=xmalloc( sizeof(*terms)*n); + i=n; + n=0; + rset_getterms(rset,terms,i,&n); + logf(LOG_LOG,"FIXME: Collected %d terms", n); + for (i=0;iname); + xfree(terms); + + /* for (i = 0; i < rset->no_rset_terms; i++) { diff --git a/rset/rsbetween.c b/rset/rsbetween.c index 1335a24..d099701 100644 --- a/rset/rsbetween.c +++ b/rset/rsbetween.c @@ -1,4 +1,4 @@ -/* $Id: rsbetween.c,v 1.27 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsbetween.c,v 1.28 2004-10-22 10:12:51 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -48,11 +48,13 @@ static int r_forward_between(RSFD rfd, void *buf, static int r_read_between (RSFD rfd, void *buf, TERMID *term ); static int r_write_between (RSFD rfd, const void *buf); static void r_pos_between (RSFD rfd, double *current, double *total); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); static const struct rset_control control = { "between", r_delete_between, + r_get_terms, r_open_between, r_close_between, r_forward_between, @@ -447,3 +449,10 @@ static void r_pos_between (RSFD rfd, double *current, double *total) *current, *total, r); #endif } + +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + struct rset_between_info *info = (struct rset_between_info *) ct->priv; + rset_getterms(info->rset_m, terms, maxterms, curterm); +} + diff --git a/rset/rsbool.c b/rset/rsbool.c index e993ecd..3f50b08 100644 --- a/rset/rsbool.c +++ b/rset/rsbool.c @@ -1,4 +1,4 @@ -/* $Id: rsbool.c,v 1.49 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsbool.c,v 1.50 2004-10-22 10:12:51 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -41,11 +41,14 @@ static int r_read_and (RSFD rfd, void *buf, TERMID *term); static int r_read_or (RSFD rfd, void *buf, TERMID *term); static int r_read_not (RSFD rfd, void *buf, TERMID *term); static int r_write (RSFD rfd, const void *buf); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); + static const struct rset_control control_and = { "and", r_delete, + r_get_terms, r_open, r_close, r_forward, @@ -58,6 +61,7 @@ static const struct rset_control control_or = { "or", r_delete, + r_get_terms, r_open, r_close, r_forward, @@ -70,6 +74,7 @@ static const struct rset_control control_not = { "not", r_delete, + r_get_terms, r_open, r_close, r_forward, @@ -512,3 +517,11 @@ static void r_pos (RSFD rfd, double *current, double *total) *current, *total, r); #endif } + +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + struct rset_bool_info *info = (struct rset_bool_info *) ct->priv; + rset_getterms(info->rset_l, terms, maxterms, curterm); + rset_getterms(info->rset_r, terms, maxterms, curterm); +} + diff --git a/rset/rset.c b/rset/rset.c index a7c2ffe..42c20f8 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.35 2004-10-20 14:32:29 heikki Exp $ +/* $Id: rset.c,v 1.36 2004-10-22 10:12:51 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -148,6 +148,26 @@ int rset_default_forward(RSFD rfd, void *buf, TERMID *term, return more; } +/** rset_get_no_terms is a getterms function for those that don't have any */ +void rset_get_no_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + return; +} + +/* rset_get_one_term gets that one term from an rset. Used by rsisamX */ +void rset_get_one_term(RSET ct,TERMID *terms,int maxterms,int *curterm) +{ + yaz_log(LOG_LOG,"FIXME: get_one_term: max=%d cur=%d", maxterms, *curterm); + if (ct->term) + { + yaz_log(LOG_LOG,"FIXME: get_one_term: '%s'", ct->term->name); + if (*curterm < maxterms) + terms[*curterm]=ct->term; + (*curterm)++; + } +} + + TERMID rset_term_create (const char *name, int length, const char *flags, int type, NMEM nmem) diff --git a/rset/rsisamb.c b/rset/rsisamb.c index e7fbe40..094ba85 100644 --- a/rset/rsisamb.c +++ b/rset/rsisamb.c @@ -1,4 +1,4 @@ -/* $Id: rsisamb.c,v 1.24 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsisamb.c,v 1.25 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -42,6 +42,7 @@ static const struct rset_control control = { "isamb", r_delete, + rset_get_one_term, r_open, r_close, r_forward, diff --git a/rset/rsisamc.c b/rset/rsisamc.c index 4e8b827..7b54d15 100644 --- a/rset/rsisamc.c +++ b/rset/rsisamc.c @@ -1,4 +1,4 @@ -/* $Id: rsisamc.c,v 1.27 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsisamc.c,v 1.28 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -40,6 +40,7 @@ static const struct rset_control control = { "isamc", r_delete, + rset_get_one_term, r_open, r_close, rset_default_forward, @@ -131,3 +132,5 @@ static void r_pos (RSFD rfd, double *current, double *total) *total=-1; } + + diff --git a/rset/rsisams.c b/rset/rsisams.c index cd096e0..0607c2b 100644 --- a/rset/rsisams.c +++ b/rset/rsisams.c @@ -1,4 +1,4 @@ -/* $Id: rsisams.c,v 1.15 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsisams.c,v 1.16 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -38,6 +38,7 @@ static const struct rset_control control = { "isams", r_delete, + rset_get_one_term, r_open, r_close, rset_default_forward, @@ -130,3 +131,5 @@ static void r_pos (RSFD rfd, double *current, double *total) *current=-1; /* sorry, not implemented yet */ *total=-1; } + + diff --git a/rset/rsmultiandor.c b/rset/rsmultiandor.c index fceb772..4f4bdcd 100644 --- a/rset/rsmultiandor.c +++ b/rset/rsmultiandor.c @@ -1,4 +1,4 @@ -/* $Id: rsmultiandor.c,v 1.6 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsmultiandor.c,v 1.7 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -55,11 +55,13 @@ static int r_forward_and(RSFD rfd, void *buf, TERMID *term, static int r_forward_or(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); static void r_pos (RSFD rfd, double *current, double *total); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); static const struct rset_control control_or = { "multi-or", r_delete, + r_get_terms, r_open_or, r_close, r_forward_or, @@ -71,6 +73,7 @@ static const struct rset_control control_and = { "multi-and", r_delete, + r_get_terms, r_open_and, r_close, r_forward_and, @@ -579,3 +582,26 @@ static int r_write (RSFD rfd, const void *buf) logf (LOG_FATAL, "multior set type is read-only"); return -1; } + +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) + /* Special case: Some multi-ors have all terms pointing to the same */ + /* term. We do not want to duplicate those. Other multiors (and ands) */ + /* have different terms under them. Those we want. */ +{ + struct rset_multiandor_info *info = + (struct rset_multiandor_info *) ct->priv; + int firstterm= *curterm; + int i; + for (i=0;ino_rsets;i++) + { + rset_getterms(info->rsets[i], terms, maxterms, curterm); + yaz_log(LOG_DEBUG,"rsmulti: getterms: i=%d *cur=%d",i,*curterm); + /* FIXME - remove this log once we know it works */ + if ( ( (*curterm) > firstterm+1 ) && + ( (*curterm) <= maxterms ) && + ( terms[(*curterm)-1] == terms[firstterm] ) ) + *curterm--; /* forget the term, seen that before */ + } +} + + diff --git a/rset/rsnull.c b/rset/rsnull.c index ca452c1..f3d2135 100644 --- a/rset/rsnull.c +++ b/rset/rsnull.c @@ -1,4 +1,4 @@ -/* $Id: rsnull.c,v 1.28 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsnull.c,v 1.29 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -39,6 +39,7 @@ static const struct rset_control control = { "null", r_delete, + rset_get_no_terms, r_open, r_close, rset_default_forward, diff --git a/rset/rsprox.c b/rset/rsprox.c index 1c1827d..d24d2ef 100644 --- a/rset/rsprox.c +++ b/rset/rsprox.c @@ -1,4 +1,4 @@ -/* $Id: rsprox.c,v 1.19 2004-10-20 14:32:29 heikki Exp $ +/* $Id: rsprox.c,v 1.20 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -39,11 +39,13 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); static int r_read (RSFD rfd, void *buf, TERMID *term); static int r_write (RSFD rfd, const void *buf); static void r_pos (RSFD rfd, double *current, double *total); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); static const struct rset_control control = { "prox", r_delete, + r_get_terms, r_open, r_close, r_forward, @@ -337,3 +339,15 @@ static void r_pos (RSFD rfd, double *current, double *total) logf(LOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ", i,*current, *total, r); } + + + +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + struct rset_prox_info *info = + (struct rset_prox_info *) ct->priv; + int i; + for (i=0;irset_no;i++) + rset_getterms(info->rset[i], terms, maxterms, curterm); +} + diff --git a/rset/rstemp.c b/rset/rstemp.c index e8d4a7d..621addf 100644 --- a/rset/rstemp.c +++ b/rset/rstemp.c @@ -1,4 +1,4 @@ -/* $Id: rstemp.c,v 1.51 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rstemp.c,v 1.52 2004-10-22 10:12:52 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -48,6 +48,7 @@ static const struct rset_control control = { "temp", r_delete, + rset_get_no_terms, r_open, r_close, rset_default_forward, -- 1.7.10.4