X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzsets.c;h=1d24d9b8e932afc69e75ca78f464446d021c77e9;hb=2b99f08490d3bd763bb086693044a6db10c0bcf9;hp=a794cfef31ba8f5a005ba59665273b9294d46fb0;hpb=6c57b2c269b3b1d16ebf2ab52c3ad3547d5ecdbb;p=idzebra-moved-to-github.git diff --git a/index/zsets.c b/index/zsets.c index a794cfe..1d24d9b 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,5 +1,5 @@ -/* $Id: zsets.c,v 1.43 2003-03-06 11:58:08 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: zsets.c,v 1.54 2004-08-06 13:36:23 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -58,7 +58,7 @@ struct zebra_set { }; struct zset_sort_entry { - int sysno; + zint sysno; int score; char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE]; }; @@ -70,7 +70,7 @@ struct zset_sort_info { struct zset_sort_entry **entries; }; -ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, +ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname) @@ -87,7 +87,7 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, return 0; zebraSet->locked = 1; zebraSet->rpn = 0; - zebraSet->nmem = nmem_create (); + zebraSet->nmem = m; zebraSet->num_bases = num_bases; zebraSet->basenames = @@ -96,7 +96,7 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]); - zebraSet->rset = rpn_search (zh, output->mem, rpn, + zebraSet->rset = rpn_search (zh, zebraSet->nmem, rpn, zebraSet->num_bases, zebraSet->basenames, zebraSet->name, zebraSet); @@ -136,7 +136,7 @@ void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, int zebra_resultSetTerms (ZebraHandle zh, const char *setname, - int no, int *count, + int no, zint *count, int *type, char *out, size_t *len) { ZebraSet s = resultSetGet (zh, setname); @@ -207,6 +207,8 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) } else { + const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000"); + yaz_log (LOG_DEBUG, "adding result set %s", name); s = (ZebraSet) xmalloc (sizeof(*s)); s->next = zh->sets; @@ -216,7 +218,10 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) s->sort_info = (struct zset_sort_info *) xmalloc (sizeof(*s->sort_info)); - s->sort_info->max_entries = 1000; + s->sort_info->max_entries = atoi(sort_max_str); + if (s->sort_info->max_entries < 2) + s->sort_info->max_entries = 2; + s->sort_info->entries = (struct zset_sort_entry **) xmalloc (sizeof(*s->sort_info->entries) * s->sort_info->max_entries); @@ -379,7 +384,7 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, { int position = 0; int num_i = 0; - int psysno = 0; + zint psysno = 0; int term_index; RSFD rfd; struct it_key key; @@ -391,9 +396,14 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, rfd = rset_open (rset, RSETF_READ); while (num_i < num && rset_read (rset, rfd, &key, &term_index)) { - if (key.sysno != psysno) +#if IT_KEY_NEW + zint this_sys = key.mem[0]; +#else + zint this_sys = key.sysno; +#endif + if (this_sys != psysno) { - psysno = key.sysno; + psysno = this_sys; if (sort_info) { /* determine we alreay have this in our set */ @@ -433,7 +443,7 @@ struct sortKeyInfo { void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, struct sortKeyInfo *criteria, int num_criteria, - int sysno) + zint sysno) { struct zset_sort_entry this_entry; struct zset_sort_entry *new_entry = NULL; @@ -507,7 +517,7 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, } void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info, - int sysno, int score, int relation) + zint sysno, int score, int relation) { struct zset_sort_entry *new_entry = NULL; int i, j; @@ -598,7 +608,8 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, ZebraSet sset, RSET rset, Z_SortKeySpecList *sort_sequence, int *sort_status) { - int i, psysno = 0; + int i; + zint psysno = 0; struct it_key key; struct sortKeyInfo sort_criteria[3]; int num_criteria; @@ -617,9 +628,9 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, Z_SortKeySpec *sks = sort_sequence->specs[i]; Z_SortKey *sk; - if (*sks->sortRelation == Z_SortRelation_ascending) + if (*sks->sortRelation == Z_SortKeySpec_ascending) sort_criteria[i].relation = 'A'; - else if (*sks->sortRelation == Z_SortRelation_descending) + else if (*sks->sortRelation == Z_SortKeySpec_descending) sort_criteria[i].relation = 'D'; else { @@ -670,10 +681,15 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, rfd = rset_open (rset, RSETF_READ); while (rset_read (rset, rfd, &key, &term_index)) { - if (key.sysno != psysno) +#if IT_KEY_NEW + zint this_sys = key.mem[0]; +#else + zint this_sys = key.sysno; +#endif + if (this_sys != psysno) { (sset->hits)++; - psysno = key.sysno; + psysno = this_sys; resultSetInsertSort (zh, sset, sort_criteria, num_criteria, psysno); } @@ -681,13 +697,13 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, rset_close (rset, rfd); for (i = 0; i < rset->no_rset_terms; i++) - yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d", + yaz_log (LOG_LOG, "term=\"%s\" nn=" ZINT_FORMAT " type=%s count=" ZINT_FORMAT, rset->rset_terms[i]->name, rset->rset_terms[i]->nn, rset->rset_terms[i]->flags, rset->rset_terms[i]->count); - *sort_status = Z_SortStatus_success; + *sort_status = Z_SortResponse_success; yaz_log (LOG_LOG, "resultSetSortSingle end"); } @@ -702,13 +718,17 @@ RSET resultSetRef (ZebraHandle zh, const char *resultSetId) void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) { - int kno = 0; + zint kno = 0; struct it_key key; RSFD rfd; int term_index, i; ZebraRankClass rank_class; struct rank_control *rc; struct zset_sort_info *sort_info; + const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1"); + double cur,tot; + zint est=-2; /* -2 not done, -1 can't do, >0 actual estimate*/ + zint esthits; sort_info = zebraSet->sort_info; sort_info->num_entries = 0; @@ -717,30 +737,64 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) yaz_log (LOG_LOG, "resultSetRank"); - rank_class = zebraRankLookup (zh, res_get_def(zh->res, "rank", "rank-1")); + rank_class = zebraRankLookup (zh, rank_handler_name); + if (!rank_class) + { + yaz_log (LOG_WARN, "No such rank handler: %s", rank_handler_name); + return; + } rc = rank_class->control; if (rset_read (rset, rfd, &key, &term_index)) { - int psysno = key.sysno; +#if IT_KEY_NEW + zint psysno = key.mem[0]; +#else + zint psysno = key.sysno; +#endif int score; void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset); (zebraSet->hits)++; + esthits=atoi(res_get_def(zh->res,"estimatehits","0")); + if (!esthits) est=-1; /* can not do */ do { +#if IT_KEY_NEW + zint this_sys = key.mem[0]; +#else + zint this_sys = key.sysno; +#endif kno++; - if (key.sysno != psysno) + if (this_sys != psysno) { score = (*rc->calc) (handle, psysno); resultSetInsertRank (zh, sort_info, psysno, score, 'A'); (zebraSet->hits)++; - psysno = key.sysno; + psysno = this_sys; } - (*rc->add) (handle, key.seqno, term_index); + (*rc->add) (handle, this_sys, term_index); + if ( (est==-2) && (zebraSet->hits==esthits)) + { /* time to estimate the hits */ + float f; + rset_pos(rset,rfd,&cur,&tot); + if (tot>0) { + f=1.0*cur/tot; + est=(zint)(0.5+zebraSet->hits/f); + /* FIXME - round the guess to 3 digits */ + logf(LOG_LOG, "Estimating hits (%s) " + "%0.1f->%d" + "; %0.1f->"ZINT_FORMAT, + rset->control->desc, + cur, zebraSet->hits, + tot,est); + zebraSet->hits=est; + } + } } - while (rset_read (rset, rfd, &key, &term_index)); + while (rset_read (rset, rfd, &key, &term_index) && (est<0) ); + score = (*rc->calc) (handle, psysno); resultSetInsertRank (zh, sort_info, psysno, score, 'A'); (*rc->end) (zh->reg, handle); @@ -748,13 +802,14 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) rset_close (rset, rfd); for (i = 0; i < rset->no_rset_terms; i++) - yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d", + yaz_log (LOG_LOG, "term=\"%s\" nn=" ZINT_FORMAT " type=%s count=" ZINT_FORMAT, rset->rset_terms[i]->name, rset->rset_terms[i]->nn, rset->rset_terms[i]->flags, rset->rset_terms[i]->count); - yaz_log (LOG_LOG, "%d keys, %d distinct sysnos", kno, zebraSet->hits); + yaz_log (LOG_LOG, ZINT_FORMAT " keys, %d distinct sysnos", + kno, zebraSet->hits); } ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name) @@ -765,7 +820,7 @@ ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name) if (p && !p->init_flag) { if (p->control->create) - p->class_handle = (*p->control->create)(zh->reg); + p->class_handle = (*p->control->create)(zh); p->init_flag = 1; } return p;