From 3f385c9ee5fea1f18f200a598747c9b385eee9d8 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 22 Jun 2005 19:42:37 +0000 Subject: [PATCH] Added support for term references (queryIDs) for searchResult. --- include/idzebra/api.h | 5 ++-- include/rset.h | 6 +++-- index/index.h | 5 ++-- index/trunc.c | 7 ++--- index/zrpn.c | 70 +++++++++++++++++++++++++++++++++++++++++++------ index/zserver.c | 5 ++-- index/zsets.c | 14 ++++++---- rset/rsbetween.c | 4 +-- rset/rset.c | 32 +++++++++++++++------- 9 files changed, 112 insertions(+), 36 deletions(-) diff --git a/include/idzebra/api.h b/include/idzebra/api.h index 1b9dd22..e2bddc2 100644 --- a/include/idzebra/api.h +++ b/include/idzebra/api.h @@ -1,4 +1,4 @@ -/* $Id: api.h,v 1.26 2005-06-09 10:39:52 adam Exp $ +/* $Id: api.h,v 1.27 2005-06-22 19:42:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -253,7 +253,8 @@ ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname, YAZ_EXPORT ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, int no, zint *count, int *approx, - char *termbuf, size_t *termlen); + char *termbuf, size_t *termlen, + const char **term_ref_id); /** diff --git a/include/rset.h b/include/rset.h index bf8f87b..7c33116 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.56 2005-06-07 14:53:38 adam Exp $ +/* $Id: rset.h,v 1.57 2005-06-22 19:42:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -61,13 +61,15 @@ struct rset_term { int reg_type; /** register type */ RSET rset; /** the rset corresponding to this term */ void *rankpriv;/** private stuff for the ranking algorithm */ + zint hits_limit;/** limit for hits if > 0 */ + char *ref_id; /** reference for this term */ struct ord_list *ol; }; typedef struct rset_term *TERMID; TERMID rset_term_create (const char *name, int length, const char *flags, int type, NMEM nmem, struct ord_list *ol, - int reg_type); + int reg_type, zint hits_limit, const char *ref_id); /** rsfd is a "file descriptor" for reading from a rset */ struct rsfd { /* the stuff common to all rsfd's. */ diff --git a/index/index.h b/index/index.h index 131f12c..a21f1f4 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.143 2005-06-09 10:39:53 adam Exp $ +/* $Id: index.h,v 1.144 2005-06-22 19:42:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -369,7 +369,8 @@ RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no, const char *term, int length_term, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, - struct ord_list *ol, int reg_type); + struct ord_list *ol, int reg_type, + zint hits_limit, const char *term_ref_id); void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, const char *db, int set, diff --git a/index/trunc.c b/index/trunc.c index b416592..2af46e6 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,4 +1,4 @@ -/* $Id: trunc.c,v 1.60 2005-06-07 14:53:39 adam Exp $ +/* $Id: trunc.c,v 1.61 2005-06-22 19:42:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -400,14 +400,15 @@ RSET rset_trunc(ZebraHandle zi, ISAM_P *isam_p, int no, const char *term, int length, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, - struct ord_list *ol, int reg_type) + struct ord_list *ol, int reg_type, + zint hits_limit, const char *term_ref_id) { TERMID termid; RSET result; int trunc_chunk; termid = rset_term_create(term, length, flags, term_type, rset_nmem, ol, - reg_type); + reg_type, hits_limit, term_ref_id); if (no < 1) return rsnull_create(rset_nmem, kctrl, termid); diff --git a/index/zrpn.c b/index/zrpn.c index 9927949..9c72835 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.200 2005-06-14 20:28:54 adam Exp $ +/* $Id: zrpn.c,v 1.201 2005-06-22 19:42:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -990,6 +990,43 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *term_dst, int xpath_use, struct ord_list **ol); +static ZEBRA_RES term_limits_APT(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + zint *hits_limit_value, + const char **term_ref_id_str) +{ + AttrType term_ref_id_attr; + AttrType hits_limit_attr; + + attr_init(&hits_limit_attr, zapt, 9); + *hits_limit_value = attr_find(&hits_limit_attr, NULL); + + attr_init(&term_ref_id_attr, zapt, 10); + attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str); + + /* no limit given ? */ + if (*hits_limit_value == -1) + if (*term_ref_id_str) + { + /* use global if term_ref is present */ + *hits_limit_value = zh->approx_limit; + } + else + { + /* no counting if term_ref is not present */ + *hits_limit_value = 0; + } + else if (*hits_limit_value == 0) + { + /* 0 is the same as global limit */ + *hits_limit_value = zh->approx_limit; + } + yaz_log(YLOG_LOG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, + *term_ref_id_str ? *term_ref_id_str : "none", + *hits_limit_value); + return ZEBRA_OK; +} + static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, @@ -1005,7 +1042,11 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, { ZEBRA_RES res; struct ord_list *ol; + zint hits_limit_value; + const char *term_ref_id_str = 0; *rset = 0; + + term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str); grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, reg_type, complete_flag, num_bases, basenames, @@ -1019,7 +1060,8 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx, term_dst, strlen(term_dst), rank_type, 1 /* preserve pos */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type); + kc, kc->scope, ol, reg_type, hits_limit_value, + term_ref_id_str); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; @@ -1871,6 +1913,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } + static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, @@ -1890,6 +1933,10 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, ZEBRA_RES res; struct grep_info grep_info; int alloc_sets = 0; + zint hits_limit_value; + const char *term_ref_id_str = 0; + + term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) @@ -1921,7 +1968,9 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, strlen(term_dst), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_type); + kc, kc->scope, 0, reg_type, + hits_limit_value, + term_ref_id_str); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -2115,7 +2164,8 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, rset = rset_trunc(zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term, strlen(term), flags, 1, term_type,rset_nmem, - kc, kc->scope, 0, reg_type); + kc, kc->scope, 0, reg_type, 0 /* hits_limit */, + 0 /* term_ref_id_str */); grep_info_delete(&grep_info); return rset; } @@ -2920,7 +2970,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */); } ptr[j0]++; /* move index for this set .. */ /* get result set for remaining scan terms */ @@ -2941,7 +2992,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which,rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */ ); rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, 2, rsets); @@ -3014,7 +3066,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */); ptr[j0]++; @@ -3033,7 +3086,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */); rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, 2, rsets); diff --git a/index/zserver.c b/index/zserver.c index 00ef4df..7feac45 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -1,4 +1,4 @@ -/* $Id: zserver.c,v 1.136 2005-06-14 20:28:54 adam Exp $ +/* $Id: zserver.c,v 1.137 2005-06-22 19:42:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -214,7 +214,8 @@ static void search_terms(ZebraHandle zh, bend_search_rr *r) size_t len = sizeof(outbuf); zebra_result_set_term_info(zh, r->setname, i, - &count, &approx, outbuf, &len); + &count, &approx, outbuf, &len, + 0 /* term_ref_id */ ); sr->elements[i] = odr_malloc (r->stream, sizeof(**sr->elements)); sr->elements[i]->subqueryId = 0; diff --git a/index/zsets.c b/index/zsets.c index 024113e..999f443 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.89 2005-06-09 10:39:53 adam Exp $ +/* $Id: zsets.c,v 1.90 2005-06-22 19:42:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -866,7 +866,7 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, key_logdump_txt(log_level_searchhits, &key, termid->name); if (this_sys != psysno) { - if (rfd->counted_items >= rset->hits_limit) + if (rfd->counted_items > rset->hits_limit) break; if (psysno) { @@ -960,8 +960,9 @@ static int trav_rset_for_termids(RSET rset, TERMID *termid_array, if (approx_array) approx_array[no] = rset->hits_approx; #if 0 - yaz_log(YLOG_LOG, "rset=%p term=%s count=" ZINT_FORMAT, - rset, rset->term->name, rset->hits_count); + yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT + " count=" ZINT_FORMAT, + rset, rset->term->name, rset->hits_limit, rset->hits_count); #endif no++; } @@ -983,7 +984,8 @@ ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname, ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, int no, zint *count, int *approx, - char *termbuf, size_t *termlen) + char *termbuf, size_t *termlen, + const char **term_ref_id) { ZebraSet sset = resultSetGet(zh, setname); if (sset) @@ -1029,6 +1031,8 @@ ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, } termbuf[*termlen] = '\0'; } + if (term_ref_id) + *term_ref_id = term_array[no]->ref_id; xfree(term_array); xfree(hits_array); diff --git a/rset/rsbetween.c b/rset/rsbetween.c index d988a6b..2117dc8 100644 --- a/rset/rsbetween.c +++ b/rset/rsbetween.c @@ -1,4 +1,4 @@ -/* $Id: rsbetween.c,v 1.41 2005-06-07 14:53:39 adam Exp $ +/* $Id: rsbetween.c,v 1.42 2005-06-22 19:42:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -96,7 +96,7 @@ static void checkterm(RSET rs, char *tag, NMEM nmem) { if (!rs->term) { - rs->term = rset_term_create(tag, strlen(tag), "", 0, nmem, 0, 0); + rs->term = rset_term_create(tag, -1, "", 0, nmem, 0, 0, 0, 0); rs->term->rset = rs; } } diff --git a/rset/rset.c b/rset/rset.c index 648193a..b71fe78 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.51 2005-06-09 10:39:53 adam Exp $ +/* $Id: rset.c,v 1.52 2005-06-22 19:42:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -78,13 +78,16 @@ void rset_close(RSFD rfd) { TERMID termid; char buf[100]; - while(rfd->counted_items < rs->hits_limit + while(rfd->counted_items <= rs->hits_limit && rset_default_read(rfd, buf, &termid)) ; rs->hits_count = rfd->counted_items; + yaz_log(log_level, "rset_close rset=%p hits_count=" ZINT_FORMAT + " hits_limit=" ZINT_FORMAT, + rs, rs->hits_count, rs->hits_limit); rs->hits_approx = 0; - if (rs->hits_count >= rs->hits_limit) + if (rs->hits_count > rs->hits_limit) { double cur, tot; zint est; @@ -159,6 +162,9 @@ RSET rset_create_base(const struct rset_control *sel, rset = (RSET) nmem_malloc(nmem, sizeof(*rset)); yaz_log(log_level, "rs_create(%s) rs=%p (nm=%p)", sel->desc, rset, nmem); + yaz_log(log_level, " ref_id=%s limit=" ZINT_FORMAT, + (term && term->ref_id ? term->ref_id : "null"), + rset->hits_limit); rset->nmem = nmem; rset->control = sel; rset->refcount = 1; @@ -173,8 +179,10 @@ RSET rset_create_base(const struct rset_control *sel, rset->scope = scope; rset->term = term; if (term) + { term->rset = rset; - + rset->hits_limit = term->hits_limit; + } rset->no_children = no_children; rset->children = 0; if (no_children) @@ -308,10 +316,13 @@ struct ord_list *ord_list_dup(NMEM nmem, struct ord_list *list) \param nmem memory for term. \param ol ord list \param reg_type register type + \param hits_limit limit before counting stops and gets approximate + \param ref_id supplied ID for term that can be used to identify this */ TERMID rset_term_create(const char *name, int length, const char *flags, int type, NMEM nmem, struct ord_list *ol, - int reg_type) + int reg_type, + zint hits_limit, const char *ref_id) { TERMID t; @@ -323,15 +334,16 @@ TERMID rset_term_create(const char *name, int length, const char *flags, else if (length == -1) t->name = nmem_strdup(nmem, name); else - { - t->name = (char*) nmem_malloc(nmem, length+1); - memcpy (t->name, name, length); - t->name[length] = '\0'; - } + t->name = nmem_strdupn(nmem, name, length); + if (!ref_id) + t->ref_id = 0; + else + t->ref_id = nmem_strdup(nmem, ref_id); if (!flags) t->flags = NULL; else t->flags = nmem_strdup(nmem, flags); + t->hits_limit = hits_limit; t->type = type; t->reg_type = reg_type; t->rankpriv = 0; -- 1.7.10.4