X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzsets.c;h=949ace4a54320608ceb3b4309911eabd408b53b7;hb=593927cb1897c1e3163c284448eff7fee6ddad51;hp=9726903cc40c11bcb28a519ab8c1c45a226dc7cf;hpb=e8393fc8e78d777294f6eabf4029b90d566cf978;p=idzebra-moved-to-github.git diff --git a/index/zsets.c b/index/zsets.c index 9726903..949ace4 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.85 2005-05-31 13:01:37 adam Exp $ +/* $Id: zsets.c,v 1.96 2005-10-28 07:25:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -62,6 +62,7 @@ struct zebra_set { zint cache_position; /* last position */ RSFD cache_rfd; /* rfd (NULL if not existing) */ zint cache_psysno; /* sysno for last position */ + zint approx_limit; /* limit before we do approx */ }; struct zset_sort_entry { @@ -128,6 +129,7 @@ ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, for (i = 0; sort_sequence->specs[i]; i++) ; sort_sequence->num_specs = i; + rset->hits_limit = sset->approx_limit; if (!i) { res = resultSetRank (zh, sset, rset, rset_nmem); @@ -152,7 +154,7 @@ ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, zh->hits = 0; - zebraSet = resultSetAdd (zh, setname, 1); + zebraSet = resultSetAdd(zh, setname, 1); if (!zebraSet) return ZEBRA_FAIL; zebraSet->locked = 1; @@ -164,7 +166,7 @@ ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, zebraSet->basenames = nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames)); for (i = 0; ibasenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]); + zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]); res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem, rpn, zebraSet); @@ -205,7 +207,7 @@ void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, (s->hits)++; } -ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) +ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov) { ZebraSet s; int i; @@ -267,10 +269,11 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) s->rpn = 0; s->cache_position = 0; s->cache_rfd = 0; + s->approx_limit = zh->approx_limit; return s; } -ZebraSet resultSetGet (ZebraHandle zh, const char *name) +ZebraSet resultSetGet(ZebraHandle zh, const char *name) { ZebraSet s; @@ -313,7 +316,7 @@ void resultSetInvalidate (ZebraHandle zh) } } -void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) +void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses) { ZebraSet * ss = &zh->sets; int i; @@ -395,6 +398,10 @@ ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, RSET rset; int i; struct zset_sort_info *sort_info; + size_t sysno_mem_index = 0; + + if (zh->m_staticrank) + sysno_mem_index = 1; if (!log_level_set) loglevels(); @@ -480,7 +487,7 @@ ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, } while (num_i < num && rset_read (rfd, &key, 0)) { - zint this_sys = key.mem[0]; + zint this_sys = key.mem[sysno_mem_index]; if (this_sys != psysno) { psysno = this_sys; @@ -708,6 +715,11 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, TERMID termid; TERMID *terms; int numTerms = 0; + size_t sysno_mem_index = 0; + + if (zh->m_staticrank) + sysno_mem_index = 1; + assert(nmem); /* compiler shut up about unused param */ sset->sort_info->num_entries = 0; @@ -782,7 +794,7 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, rfd = rset_open (rset, RSETF_READ); while (rset_read (rfd, &key, &termid)) { - zint this_sys = key.mem[0]; + zint this_sys = key.mem[sysno_mem_index]; if (log_level_searchhits) key_logdump_txt(log_level_searchhits, &key, termid->name); kno++; @@ -826,6 +838,10 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, ZebraRankClass rank_class; struct zset_sort_info *sort_info; const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1"); + size_t sysno_mem_index = 0; + + if (zh->m_staticrank) + sysno_mem_index = 1; if (!log_level_set) loglevels(); @@ -836,6 +852,7 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n); rset_getterms(rset, terms, n, &numTerms); + rank_class = zebraRankLookup(zh, rank_handler_name); if (!rank_class) { @@ -848,35 +865,49 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, RSFD rfd = rset_open(rset, RSETF_READ); struct rank_control *rc = rank_class->control; double score; + zint count = 0; void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem, terms, numTerms); - zint psysno = 0; + zint psysno = 0; /* previous doc id / sys no */ + zint pstaticrank = 0; /* previous static rank */ + int stop_flag = 0; while (rset_read(rfd, &key, &termid)) { - zint this_sys = key.mem[0]; + zint this_sys = key.mem[sysno_mem_index]; + zint seqno = key.mem[key.len-1]; kno++; if (log_level_searchhits) key_logdump_txt(log_level_searchhits, &key, termid->name); - if (this_sys != psysno) - { - if (rfd->counted_items >= rset->hits_limit) + if (this_sys != psysno) + { /* new record .. */ + if (rfd->counted_items > rset->hits_limit) break; if (psysno) - { - score = (*rc->calc) (handle, psysno); + { /* only if we did have a previous record */ + score = (*rc->calc) (handle, psysno, pstaticrank, + &stop_flag); + /* insert the hit. A=Ascending */ resultSetInsertRank (zh, sort_info, psysno, score, 'A'); + count++; + if (stop_flag) + break; } psysno = this_sys; + if (zh->m_staticrank) + pstaticrank = key.mem[0]; } (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid); } + /* no more items */ if (psysno) - { - score = (*rc->calc)(handle, psysno); + { /* we had - at least - one record */ + score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag); + /* insert the hit. A=Ascending */ resultSetInsertRank(zh, sort_info, psysno, score, 'A'); + count++; } (*rc->end) (zh->reg, handle); rset_close (rfd); @@ -954,8 +985,9 @@ static int trav_rset_for_termids(RSET rset, TERMID *termid_array, if (approx_array) approx_array[no] = rset->hits_approx; #if 0 - yaz_log(YLOG_LOG, "rset=%p term=%s count=" ZINT_FORMAT, - rset, rset->term->name, rset->hits_count); + yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT + " count=" ZINT_FORMAT, + rset, rset->term->name, rset->hits_limit, rset->hits_count); #endif no++; } @@ -977,7 +1009,8 @@ ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname, ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, int no, zint *count, int *approx, - char *termbuf, size_t *termlen) + char *termbuf, size_t *termlen, + const char **term_ref_id) { ZebraSet sset = resultSetGet(zh, setname); if (sset) @@ -1023,6 +1056,8 @@ ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, } termbuf[*termlen] = '\0'; } + if (term_ref_id) + *term_ref_id = term_array[no]->ref_id; xfree(term_array); xfree(hits_array); @@ -1033,11 +1068,11 @@ ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, return ZEBRA_FAIL; } -ZEBRA_RES zebra_get_hit_vector(ZebraHandle zh, const char *setname, - zint sysno) +ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, + zint sysno, zebra_snippets *snippets) { ZebraSet sset = resultSetGet(zh, setname); - yaz_log(YLOG_LOG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT, + yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT, setname, sysno); if (!sset) return ZEBRA_FAIL; @@ -1072,13 +1107,19 @@ ZEBRA_RES zebra_get_hit_vector(ZebraHandle zh, const char *setname, { if (termid) { - key_logdump_txt(YLOG_LOG, &key, termid->name); - yaz_log(YLOG_LOG, " type=%d", termid->type); + struct ord_list *ol; + for (ol = termid->ol; ol; ol = ol->next) + { + zebra_snippets_append(snippets, key.mem[key.len-1], + ol->ord, termid->name); + } } } rset_close(rsfd); rset_delete(rset_comb); + nmem_destroy(nmem); + kc->dec(kc); } return ZEBRA_OK; }