From 1dfb2eabceebab1841f13a07314171c00873409d Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 20 Oct 2004 14:32:28 +0000 Subject: [PATCH] Creating search terms, and passing them around in searches. Not yet actually passign them to ranking, but getting closer... Also, fixed some bugs in rsprox (hooray for our tests!) --- include/rset.h | 50 ++++++++++++++++++++++++++++++++++++++++++-------- index/trunc.c | 15 +++++++-------- index/zsets.c | 44 ++++++++++++++++++++++++-------------------- rset/rset.c | 31 ++++++++++++++++++++++++++++++- rset/rsprox.c | 10 +++++----- test/api/t5.c | 3 ++- 6 files changed, 110 insertions(+), 43 deletions(-) diff --git a/include/rset.h b/include/rset.h index ae1dd37..0b529a7 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.37 2004-10-15 10:07:32 heikki Exp $ +/* $Id: rset.h,v 1.38 2004-10-20 14:32:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -40,14 +40,28 @@ extern "C" { typedef struct rsfd *RSFD; /* Rset "file descriptor" */ typedef struct rset *RSET; /* Result set */ -/* -typedef struct terminfo *TERMID; -*/ -typedef int TERMID; - /* term thing for the rsets. They don't need to */ - /* know what it is. FIXME - define that somewhere */ -/* using int while testing, to get more type checking to work */ +/** + * rset_term is all we need to know of a term to do ranking etc. + * As far as the rsets are concerned, it is just a dummy pointer to + * be passed around. + */ + +struct rset_term { + char *name; + int nn; + char *flags; + int count; + int type; +}; + +typedef struct rset_term *TERMID; +TERMID rset_term_create (const char *name, int length, const char *flags, + int type, NMEM nmem); + + + +/** rsfd is a "file descriptor" for reading from a rset */ struct rsfd { /* the stuff common to all rsfd's. */ RSET rset; /* ptr to the rset this FD is opened to */ void *priv; /* private parameters for this type */ @@ -55,6 +69,13 @@ struct rsfd { /* the stuff common to all rsfd's. */ }; +/** + * rset_control has function pointers to all the important functions + * of a rset. Each type of rset will have its own control block, pointing + * to the functions for that type. They all have their own create function + * which is not part of the control block, as it takes different args for + * each type. + */ struct rset_control { char *desc; /* text description of set type (for debugging) */ @@ -69,9 +90,16 @@ struct rset_control int (*f_write)(RSFD rfd, const void *buf); }; +/** rset_default_forward implements a generic forward with a read-loop */ int rset_default_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); +/** + * key_control contains all there is to know about the keys stored in + * an isam, and therefore operated by the rsets. Other than this info, + * all we assume is that all keys are the same size, and they can be + * memcpy'd around + */ struct key_control { int key_size; int scope; /* default for what level we operate (book/chapter/verse) on*/ @@ -84,6 +112,12 @@ struct key_control { /* FIXME - decode and encode, and lots of other stuff */ }; +/** + * A rset is an ordered sequence of keys, either directly from an underlaying + * isam, or from one of the higher-level operator rsets (and, or, ...). + * Actually, it is "virtual base class", no pure rsets exist in the system, + * they all are of some derived type. + */ typedef struct rset { const struct rset_control *control; diff --git a/index/trunc.c b/index/trunc.c index 0ebf73a..893e1c0 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,4 +1,4 @@ -/* $Id: trunc.c,v 1.46 2004-10-15 10:07:32 heikki Exp $ +/* $Id: trunc.c,v 1.47 2004-10-20 14:32:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -407,30 +407,30 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, int preserve_position, int term_type, NMEM rset_nmem, const struct key_control *kctrl, int scope) { + TERMID termid; logf (LOG_DEBUG, "rset_trunc no=%d", no); if (no < 1) return rsnull_create (rset_nmem,kctrl); + termid=rset_term_create(term, length, flags, term_type,rset_nmem); if (zi->reg->isams) { if (no == 1) return rsisams_create(rset_nmem, kctrl, scope, - zi->reg->isams, *isam_p, - 0 /*FIXME - use proper TERMID*/); + zi->reg->isams, *isam_p, termid); qsort (isam_p, no, sizeof(*isam_p), isams_trunc_cmp); } else if (zi->reg->isamc) { if (no == 1) return rsisamc_create(rset_nmem, kctrl, scope, - zi->reg->isamc, *isam_p, - 0 /*FIXME - use proper TERMID*/); + zi->reg->isamc, *isam_p, termid); qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp); } else if (zi->reg->isamb) { if (no == 1) return rsisamb_create(rset_nmem,kctrl, scope, - zi->reg->isamb, *isam_p, 0 /* FIXME - TERMID */ ); + zi->reg->isamb, *isam_p, termid); else if (no <10000 ) /* FIXME - hardcoded number */ { RSET r; @@ -438,8 +438,7 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, int i; for (i=0;ireg->isamb, isam_p[i], - 0 /* FIXME - use a proper TERMID */ ); + zi->reg->isamb, isam_p[i], termid); r=rsmultior_create( rset_nmem, kctrl, scope, no, rsets); xfree(rsets); return r; diff --git a/index/zsets.c b/index/zsets.c index d8655d5..c0d6ce9 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.63 2004-10-15 10:07:34 heikki Exp $ +/* $Id: zsets.c,v 1.64 2004-10-20 14:32:28 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -206,6 +206,8 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) return NULL; if (s->rset) rset_delete (s->rset); + if (s->rset_nmem) + nmem_destroy (s->rset_nmem); if (s->nmem) nmem_destroy (s->nmem); } @@ -256,6 +258,8 @@ ZebraSet resultSetGet (ZebraHandle zh, const char *name) { NMEM nmem = nmem_create (); yaz_log (LOG_LOG, "research %s", name); + if (!s->rset_nmem) + s->rset_nmem=nmem_create(); s->rset = rpn_search (zh, nmem, s->rset_nmem, s->rpn, s->num_bases, s->basenames, s->name, s); @@ -778,28 +782,28 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) /* term lists! */ /* (*rc->add) (handle, this_sys, term_index); */ - if ( (est==-2) && (zebraSet->hits==esthits)) - { /* time to estimate the hits */ - rset_pos(rfd,&cur,&tot); - if (tot>0) { - ratio=cur/tot; - est=(zint)(0.5+zebraSet->hits/ratio); - logf(LOG_LOG, "Estimating hits (%s) " - "%0.1f->"ZINT_FORMAT - "; %0.1f->"ZINT_FORMAT, - rset->control->desc, - cur, zebraSet->hits, - tot,est); - i=0; /* round to 3 significant digits */ - while (est>1000) { - est/=10; - i++; + if ( (est==-2) && (zebraSet->hits==esthits)) + { /* time to estimate the hits */ + rset_pos(rfd,&cur,&tot); + if (tot>0) { + ratio=cur/tot; + est=(zint)(0.5+zebraSet->hits/ratio); + logf(LOG_LOG, "Estimating hits (%s) " + "%0.1f->"ZINT_FORMAT + "; %0.1f->"ZINT_FORMAT, + rset->control->desc, + cur, zebraSet->hits, + tot,est); + i=0; /* round to 3 significant digits */ + while (est>1000) { + est/=10; + i++; + } + while (i--) est*=10; + zebraSet->hits=est; } - while (i--) est*=10; - zebraSet->hits=est; } } - } while (rset_read (rfd, &key,0) && (est<0) ); /* FIXME - term ?? */ score = (*rc->calc) (handle, psysno); diff --git a/rset/rset.c b/rset/rset.c index 4b5e801..a7c2ffe 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.34 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rset.c,v 1.35 2004-10-20 14:32:29 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -148,3 +148,32 @@ int rset_default_forward(RSFD rfd, void *buf, TERMID *term, return more; } +TERMID rset_term_create (const char *name, int length, const char *flags, + int type, NMEM nmem) + +{ + TERMID t; + logf (LOG_DEBUG, "term_create '%s' %d f=%s type=%d nmem=%p", + name, length, flags, type, nmem); + t= (TERMID) nmem_malloc (nmem, sizeof(*t)); + if (!name) + t->name = NULL; + else if (length == -1) + t->name = nmem_strdup(nmem,name); + else + { + t->name = (char*) nmem_malloc(nmem,length+1); + memcpy (t->name, name, length); + t->name[length] = '\0'; + } + if (!flags) + t->flags = NULL; + else + t->flags = nmem_strdup(nmem,flags); + t->nn = -1; + t->count = 0; + t->type = type; + return t; +} + + diff --git a/rset/rsprox.c b/rset/rsprox.c index fa562d8..1c1827d 100644 --- a/rset/rsprox.c +++ b/rset/rsprox.c @@ -1,4 +1,4 @@ -/* $Id: rsprox.c,v 1.18 2004-10-15 10:07:34 heikki Exp $ +/* $Id: rsprox.c,v 1.19 2004-10-20 14:32:29 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -196,7 +196,7 @@ static int r_forward (RSFD rfd, void *buf, TERMID *term, const void *untilbuf) (*kctrl->getseq)(p->buf[i])) { /* FIXME - We need more flexible multilevel stuff */ p->more[i-1] = rset_read ( p->rfd[i-1], p->buf[i-1], - &p->terms[i]); + &p->terms[i-1]); break; } } @@ -211,8 +211,8 @@ static int r_forward (RSFD rfd, void *buf, TERMID *term, const void *untilbuf) { memcpy (buf, p->buf[0], kctrl->key_size); if (term) - *term=p->terms[i]; - p->more[0] = rset_read (p->rfd[0], p->buf[0], &p->terms[i]); + *term=p->terms[0]; + p->more[0] = rset_read (p->rfd[0], p->buf[0], &p->terms[0]); p->hits++; return 1; } @@ -285,7 +285,7 @@ static int r_forward (RSFD rfd, void *buf, TERMID *term, const void *untilbuf) return 1; } } - p->more[1] = rset_read (p->rfd[1], p->buf[1],&p->terms[i]); + p->more[1] = rset_read (p->rfd[1], p->buf[1],&p->terms[1]); } } } diff --git a/test/api/t5.c b/test/api/t5.c index 8db05c0..e5f4334 100644 --- a/test/api/t5.c +++ b/test/api/t5.c @@ -1,4 +1,4 @@ -/* $Id: t5.c,v 1.6 2004-08-25 09:23:39 adam Exp $ +/* $Id: t5.c,v 1.7 2004-10-20 14:32:29 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -64,6 +64,7 @@ int main(int argc, char **argv) ; yaz_log_init_file("t5.log"); + yaz_log_init_level(LOG_ALL); nmem_init (); -- 1.7.10.4