From 4aae319a0b820d1e8d3ab5d82c48f5047c9995f9 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 30 Nov 2006 10:33:18 +0000 Subject: [PATCH] Added support for specification of approximative limits for whole query. This is specified as attribute type 12. Semantics is the same as estimatehits in zebra.cfg. --- NEWS | 4 ++ doc/administration.xml | 4 +- doc/querymodel.xml | 98 +++++++++++++++++++++++++++++------------------- index/index.h | 5 ++- index/rpnsearch.c | 33 +++++++++++++++- index/zsets.c | 6 ++- 6 files changed, 107 insertions(+), 43 deletions(-) diff --git a/NEWS b/NEWS index 0de54db..e7c16ba 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Added support for specification of approximative limits for whole query. +This is specified as attribute type 12. Semantics is the same as +estimatehits in zebra.cfg. + --- 2.0.6 2006/11/28 Fixed bug #736: Updates gets slower. diff --git a/doc/administration.xml b/doc/administration.xml index 8905d11..ce2e903 100644 --- a/doc/administration.xml +++ b/doc/administration.xml @@ -1,5 +1,5 @@ - + Administrating Zebra + Query Model
@@ -205,7 +205,7 @@ bib-1 Standard PQF query language attribute set which defines the semantics of Z39.50 searching. In addition, all of the - non-use attributes (types 2-11) define the hard-wired + non-use attributes (types 2-12) define the hard-wired Zebra internal query processing. default @@ -348,7 +348,7 @@ Atomic (APT) queries are always leaf nodes in the PQF query tree. - UN-supplied non-use attributes types 2-11 are either inherited from + UN-supplied non-use attributes types 2-12 are either inherited from higher nodes in the query tree, or are set to Zebra's default values. See for details. @@ -1434,7 +1434,7 @@ model. These extensions are non-standard and non-portable: most functional extensions are modeled over the bib-1 attribute set, - defining type 7-9 attributes. + defining type 7 and higher values. There are also the special string type index names for the idxpath attribute set. @@ -1514,17 +1514,23 @@ 1.1 - Approx Limit - 11 + Term Reference + 10 search 1.4 - Term Reference - 10 + Local Approx Limit + 11 search 1.4 + + Global Approx Limit + 12 + search + 2.0.8 + @@ -1622,12 +1628,37 @@ Z> find @attr 2=102 @or @attr 9=30 @attr 1=4 utah @attr 9=20 utah -
+ + +
+ Zebra Extension Term Reference Attribute (type 10) + + Zebra supports the searchResult-1 facility. + If the Term Reference Attribute (type 10) is + given, that specifies a subqueryId value returned as part of the + search result. It is a way for a client to name an APT part of a + query. + + + + + Experimental. Do not use in production code. + + + +
+ + -
- Zebra Extension Approximative Limit Attribute (type 11) +
+ Local Approximative Limit Attribute (type 11) - Zebra computes - unless otherwise configured - + Zebra computes - unless otherwise configured - the exact hit count for every APT (leaf) in the query tree. These hit counts are returned as part of the searchResult-1 facility in the binary encoded Z39.50 search @@ -1663,45 +1694,36 @@ Do not use approximative hit count limits in conjunction with relevance ranking, as re-sorting of the - result set obviosly only works when the entire result set has + result set only works when the entire result set has been processed. - - - This facility clashes with rank weight, because there all - documents in the hit lists need to be examined for scoring and - re-sorting. - It is an experimental - extension. Do not use in production code. - -
-
- Zebra Extension Term Reference Attribute (type 10) +
+ Global Approximative Limit Attribute (type 12) - Zebra supports the searchResult-1 facility. - If the Term Reference Attribute (type 10) is - given, that specifies a subqueryId value returned as part of the - search result. It is a way for a client to name an APT part of a - query. + By default Zebra computes precise hit counts for a query as + a whole. Setting attribute 12 makes it perform approximative + hit counts instead. It has the same semantics as + estimatehits for the . - + The attribute (12) can occur anywhere in the query tree. + Unlike regular attributes it does not relate to the leaf (APT) + - but to the whole query. + - Experimental. Do not use in production code. - + Do not use approximative hit count limits + in conjunction with relevance ranking, as re-sorting of the + result set only works when the entire result set has + been processed. + -
+
-
Zebra specific Scan Extensions to all Attribute Sets diff --git a/index/index.h b/index/index.h index c7993fa..dcb408d 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.184 2006-11-25 09:15:19 adam Exp $ +/* $Id: index.h,v 1.185 2006-11-30 10:33:19 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -261,6 +261,9 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, int num_bases, char **basenames, RSET *result_set); +ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, + zint *approx_limit); + ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, oid_value attributeset, int num_bases, char **basenames, diff --git a/index/rpnsearch.c b/index/rpnsearch.c index 3057580..9529573 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,4 +1,4 @@ -/* $Id: rpnsearch.c,v 1.2 2006-10-12 12:28:42 adam Exp $ +/* $Id: rpnsearch.c,v 1.3 2006-11-30 10:33:19 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -2318,6 +2318,37 @@ static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, Z_Operator *parent_op, struct rset_key_control *kc); +ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, + zint *approx_limit) +{ + ZEBRA_RES res = ZEBRA_OK; + if (zs->which == Z_RPNStructure_complex) + { + if (res == ZEBRA_OK) + res = rpn_get_top_approx_limit(zh, zs->u.complex->s1, + approx_limit); + if (res == ZEBRA_OK) + res = rpn_get_top_approx_limit(zh, zs->u.complex->s2, + approx_limit); + } + else if (zs->which == Z_RPNStructure_simple) + { + if (zs->u.simple->which == Z_Operand_APT) + { + Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm; + AttrType global_hits_limit_attr; + int l; + + attr_init_APT(&global_hits_limit_attr, zapt, 12); + + l = attr_find(&global_hits_limit_attr, NULL); + if (l != -1) + *approx_limit = l; + } + } + return res; +} + ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, oid_value attributeSet, NMEM stream, NMEM rset_nmem, diff --git a/index/zsets.c b/index/zsets.c index 643ba2c..5c02dd4 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.112 2006-10-29 17:20:01 adam Exp $ +/* $Id: zsets.c,v 1.113 2006-11-30 10:33:19 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -94,6 +94,7 @@ static void loglevels(void) log_level_set = 1; } + ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, Z_RPNQuery *rpn, ZebraSet sset) { @@ -115,6 +116,9 @@ ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, sort_sequence->specs[i] = 0; attrset = oid_getentbyoid (rpn->attributeSetId); + + rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit); + res = rpn_search_top(zh, rpn->RPNStructure, attrset->value, nmem, rset_nmem, sort_sequence, -- 1.7.10.4