X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzsets.c;h=efe14c78a54aa5c61f2ee6510b7a04daea275780;hb=73bedd5ba9152c9c107b502fae65723b551aff09;hp=11ac10116a4223011a5a95da6f9be6e7194317aa;hpb=863d336f803da454e03f39ee2225719fed05021e;p=idzebra-moved-to-github.git diff --git a/index/zsets.c b/index/zsets.c index 11ac101..efe14c7 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.57 2004-08-20 14:44:46 heikki Exp $ +/* $Id: zsets.c,v 1.69 2004-10-26 15:32:11 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -30,7 +30,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #endif #include "index.h" -#include +#include #define SORT_IDX_ENTRYSIZE 64 #define ZSET_SORT_MAX_LEVEL 3 @@ -46,6 +46,7 @@ struct zebra_set { char *name; RSET rset; NMEM nmem; + NMEM rset_nmem; /* for creating the rsets in */ zint hits; int num_bases; char **basenames; @@ -88,6 +89,7 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m, zebraSet->locked = 1; zebraSet->rpn = 0; zebraSet->nmem = m; + zebraSet->rset_nmem=nmem_create(); zebraSet->num_bases = num_bases; zebraSet->basenames = @@ -96,8 +98,8 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m, zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]); - zebraSet->rset = rpn_search (zh, zebraSet->nmem, rpn, - zebraSet->num_bases, + zebraSet->rset = rpn_search (zh, zebraSet->nmem, zebraSet->rset_nmem, + rpn, zebraSet->num_bases, zebraSet->basenames, zebraSet->name, zebraSet); zh->hits = zebraSet->hits; @@ -204,6 +206,8 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) return NULL; if (s->rset) rset_delete (s->rset); + if (s->rset_nmem) + nmem_destroy (s->rset_nmem); if (s->nmem) nmem_destroy (s->nmem); } @@ -237,6 +241,7 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) s->term_entries = 0; s->hits = 0; s->rset = 0; + s->rset_nmem=0; s->nmem = 0; s->rpn = 0; return s; @@ -253,8 +258,10 @@ ZebraSet resultSetGet (ZebraHandle zh, const char *name) { NMEM nmem = nmem_create (); yaz_log (LOG_LOG, "research %s", name); + if (!s->rset_nmem) + s->rset_nmem=nmem_create(); s->rset = - rpn_search (zh, nmem, s->rpn, s->num_bases, + rpn_search (zh, nmem, s->rset_nmem, s->rpn, s->num_bases, s->basenames, s->name, s); nmem_destroy (nmem); } @@ -272,6 +279,9 @@ void resultSetInvalidate (ZebraHandle zh) if (s->rset) rset_delete (s->rset); s->rset = 0; + if (s->rset_nmem) + nmem_destroy(s->rset_nmem); + s->rset_nmem=0; } } @@ -312,6 +322,8 @@ void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) nmem_destroy (s->nmem); if (s->rset) rset_delete (s->rset); + if (s->rset_nmem) + nmem_destroy(s->rset_nmem); xfree (s->name); xfree (s); } @@ -395,13 +407,9 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, while (num_i < num && positions[num_i] < position) num_i++; rfd = rset_open (rset, RSETF_READ); - while (num_i < num && rset_read (rset, rfd, &key)) + while (num_i < num && rset_read (rfd, &key, 0)) { -#if IT_KEY_NEW zint this_sys = key.mem[0]; -#else - zint this_sys = key.sysno; -#endif if (this_sys != psysno) { psysno = this_sys; @@ -425,7 +433,7 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, } } } - rset_close (rset, rfd); + rset_close (rfd); } } return sr; @@ -617,6 +625,7 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, struct sortKeyInfo sort_criteria[3]; int num_criteria; RSFD rfd; + TERMID termid; yaz_log (LOG_LOG, "resultSetSortSingle start"); assert(nmem); /* compiler shut up about unused param */ @@ -682,13 +691,10 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, } } rfd = rset_open (rset, RSETF_READ); - while (rset_read (rset, rfd, &key)) + while (rset_read (rfd, &key,&termid)) + /* FIXME - pass a TERMID *, and use it for something below !! */ { -#if IT_KEY_NEW zint this_sys = key.mem[0]; -#else - zint this_sys = key.sysno; -#endif if (this_sys != psysno) { (sset->hits)++; @@ -697,7 +703,7 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, sort_criteria, num_criteria, psysno); } } - rset_close (rset, rfd); + rset_close (rfd); #if 0 for (i = 0; i < rset->no_rset_terms; i++) @@ -721,13 +727,15 @@ RSET resultSetRef (ZebraHandle zh, const char *resultSetId) return NULL; } -void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) +void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset, NMEM nmem) { zint kno = 0; struct it_key key; RSFD rfd; - /* int term_index; */ - int i; + TERMID termid; + TERMID *terms; + int numterms; + int i,n; ZebraRankClass rank_class; struct rank_control *rc; struct zset_sort_info *sort_info; @@ -740,9 +748,13 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) sort_info = zebraSet->sort_info; sort_info->num_entries = 0; zebraSet->hits = 0; - rfd = rset_open (rset, RSETF_READ); + n=0; + rset_getterms(rset,0,0,&n); + terms=malloc( sizeof(*terms)*n); + numterms=0; + rset_getterms(rset,terms,n,&numterms); - yaz_log (LOG_LOG, "resultSetRank"); + rfd = rset_open (rset, RSETF_READ); rank_class = zebraRankLookup (zh, rank_handler_name); if (!rank_class) @@ -752,27 +764,21 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) } rc = rank_class->control; - if (rset_read (rset, rfd, &key)) + if (rset_read (rfd, &key, &termid)) { -#if IT_KEY_NEW zint psysno = key.mem[0]; -#else - zint psysno = key.sysno; -#endif int score; void *handle = - (*rc->begin) (zh->reg, rank_class->class_handle, rset); + (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem, + terms, numterms); (zebraSet->hits)++; esthits=atoi(res_get_def(zh->res,"estimatehits","0")); if (!esthits) est=-1; /* can not do */ do { -#if IT_KEY_NEW - zint this_sys = key.mem[0]; -#else - zint this_sys = key.sysno; -#endif + zint this_sys = key.mem[0]; /* FIXME - assumes scope==2 */ + zint seqno = key.mem[1]; /* FIXME - assumes scope==2 */ kno++; if (this_sys != psysno) { @@ -782,54 +788,47 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) (zebraSet->hits)++; psysno = this_sys; } - /* FIXME - Ranking is broken, since rsets no longer have */ - /* term lists! */ - /* (*rc->add) (handle, this_sys, term_index); */ + (*rc->add) (handle, seqno, termid); - if ( (est==-2) && (zebraSet->hits==esthits)) - { /* time to estimate the hits */ - rset_pos(rset,rfd,&cur,&tot); - if (tot>0) { - ratio=cur/tot; - est=(zint)(0.5+zebraSet->hits/ratio); - logf(LOG_LOG, "Estimating hits (%s) " - "%0.1f->"ZINT_FORMAT - "; %0.1f->"ZINT_FORMAT, - rset->control->desc, - cur, zebraSet->hits, - tot,est); - i=0; /* round to 3 significant digits */ - while (est>1000) { - est/=10; - i++; + if ( (est==-2) && (zebraSet->hits==esthits)) + { /* time to estimate the hits */ + rset_pos(rfd,&cur,&tot); + if (tot>0) { + ratio=cur/tot; + est=(zint)(0.5+zebraSet->hits/ratio); + logf(LOG_LOG, "Estimating hits (%s) " + "%0.1f->"ZINT_FORMAT + "; %0.1f->"ZINT_FORMAT, + rset->control->desc, + cur, zebraSet->hits, + tot,est); + i=0; /* round to 3 significant digits */ + while (est>1000) { + est/=10; + i++; + } + while (i--) est*=10; + zebraSet->hits=est; } - while (i--) est*=10; - zebraSet->hits=est; } } - } - while (rset_read (rset, rfd, &key) && (est<0) ); - + while (rset_read (rfd, &key,&termid) && (est<0) ); score = (*rc->calc) (handle, psysno); resultSetInsertRank (zh, sort_info, psysno, score, 'A'); (*rc->end) (zh->reg, handle); } - rset_close (rset, rfd); -/* - for (i = 0; i < rset->no_rset_terms; i++) + rset_close (rfd); + + + for (i = 0; i < numterms; i++) { - if (est>0) - rset->rset_terms[i]->count = - est=(zint)(rset->rset_terms[i]->count/ratio); - yaz_log (LOG_LOG, "term=\"%s\" nn=" ZINT_FORMAT + yaz_log (LOG_LOG, "term=\"%s\" " " type=%s count=" ZINT_FORMAT, - rset->rset_terms[i]->name, - rset->rset_terms[i]->nn, - rset->rset_terms[i]->flags, - rset->rset_terms[i]->count); + terms[i]->name, + terms[i]->flags, + rset_count(terms[i]->rset)); } -*/ - yaz_log (LOG_LOG, ZINT_FORMAT " keys, "ZINT_FORMAT" distinct sysnos", + yaz_log (LOG_DEBUG, ZINT_FORMAT " keys, "ZINT_FORMAT" distinct sysnos", kno, zebraSet->hits); }