From 342c28e71fe5122d17d760a930107cab0266e9c6 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 19 Aug 2005 09:21:34 +0000 Subject: [PATCH] Extend the ranking interface so staticrank is passed to calc method. Added simple staticrank algorithm/example. --- index/Makefile.am | 4 +- index/index.h | 10 +-- index/rank1.c | 41 ++++++----- index/rankstatic.c | 156 +++++++++++++++++++++++++++++++++++++++++ index/zebraapi.c | 7 +- index/zsets.c | 23 +++--- index/zvrank.c | 7 +- test/xslt/zebrastaticrank.cfg | 1 + 8 files changed, 206 insertions(+), 43 deletions(-) create mode 100644 index/rankstatic.c diff --git a/index/Makefile.am b/index/Makefile.am index 22c382e..4fdeb38 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.31 2005-05-03 09:11:34 adam Exp $ +## $Id: Makefile.am,v 1.32 2005-08-19 09:21:34 adam Exp $ noinst_PROGRAMS = apitest kdump @@ -7,7 +7,7 @@ lib_LTLIBRARIES = libidzebra-api.la libidzebra_api_la_SOURCES = dir.c dirs.c trav.c kinput.c kcompare.c \ attribute.c symtab.c recindex.c recstat.c lockutil.c \ zebraapi.c zinfo.c invstat.c sortidx.c compact.c zsets.c zrpn.c \ - rank1.c trunc.c retrieve.c extract.c \ + rank1.c trunc.c retrieve.c extract.c rankstatic.c \ index.h recindex.h recindxp.h \ zinfo.h zserver.h zvrank.c limit.c kcontrol.c diff --git a/index/index.h b/index/index.h index 753dac0..d620b87 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.147 2005-08-18 12:50:17 adam Exp $ +/* $Id: index.h,v 1.148 2005-08-19 09:21:34 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -333,7 +333,7 @@ struct rank_control { * int rssize; // number of records in result set (estimate?) */ void (*end)(struct zebra_register *reg, void *set_handle); - int (*calc)(void *set_handle, zint sysno); + int (*calc)(void *set_handle, zint sysno, zint staticrank); void (*add)(void *set_handle, int seqno, TERMID term); }; @@ -419,9 +419,9 @@ void zebraRankDestroy (struct zebra_register *reg); int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att, const char *sattr); -extern struct rank_control *rank1_class; -extern struct rank_control *rankzv_class; -extern struct rank_control *rankliv_class; +extern struct rank_control *rank_1_class; +extern struct rank_control *rank_zv_class; +extern struct rank_control *rank_static_class; int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, zebra_snippets *hit_snippet, ODR stream, diff --git a/index/rank1.c b/index/rank1.c index 0b1ddfb..fb3529e 100644 --- a/index/rank1.c +++ b/index/rank1.c @@ -1,4 +1,4 @@ -/* $Id: rank1.c,v 1.25 2005-06-14 20:28:54 adam Exp $ +/* $Id: rank1.c,v 1.26 2005-08-19 09:21:34 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -29,11 +29,10 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #endif - #include "index.h" -static int log_level=0; -static int log_initialized=0; +static int log_level = 0; +static int log_initialized = 0; struct rank_class_info { int dummy; @@ -78,10 +77,10 @@ static void *create (ZebraHandle zh) if (!log_initialized) { - log_level=yaz_log_module_level("rank1"); - log_initialized=1; + log_level = yaz_log_module_level("rank1"); + log_initialized = 1; } - yaz_log (log_level, "rank-1 create"); + yaz_log(log_level, "rank-1 create"); return ci; } @@ -94,7 +93,7 @@ static void destroy (struct zebra_register *reg, void *class_handle) { struct rank_class_info *ci = (struct rank_class_info *) class_handle; - yaz_log (log_level, "rank-1 destroy"); + yaz_log(log_level, "rank-1 destroy"); xfree (ci); } @@ -112,7 +111,7 @@ static void *begin (struct zebra_register *reg, (struct rank_set_info *) nmem_malloc (nmem,sizeof(*si)); int i; - yaz_log (log_level, "rank-1 begin"); + yaz_log(log_level, "rank-1 begin"); si->no_entries = numterms; si->no_rank_entries = 0; si->nmem=nmem; @@ -131,7 +130,7 @@ static void *begin (struct zebra_register *reg, si->entries[i].rank_weight = atoi (cp+3); else si->entries[i].rank_weight = 34; - yaz_log (log_level, " i=%d weight=%d g="ZINT_FORMAT, i, + yaz_log(log_level, " i=%d weight=%d g="ZINT_FORMAT, i, si->entries[i].rank_weight, g); (si->no_rank_entries)++; } @@ -140,11 +139,11 @@ static void *begin (struct zebra_register *reg, si->entries[i].local_occur = 0; /* FIXME */ si->entries[i].global_occur = g; si->entries[i].global_inv = 32 - log2_int (g); - yaz_log (log_level, " global_inv = %d g = " ZINT_FORMAT, + yaz_log(log_level, " global_inv = %d g = " ZINT_FORMAT, (int) (32-log2_int (g)), g); - si->entries[i].term=terms[i]; + si->entries[i].term = terms[i]; si->entries[i].term_index=i; - terms[i]->rankpriv=&(si->entries[i]); + terms[i]->rankpriv = &(si->entries[i]); } return si; } @@ -155,7 +154,7 @@ static void *begin (struct zebra_register *reg, */ static void end (struct zebra_register *reg, void *set_handle) { - yaz_log (log_level, "rank-1 end"); + yaz_log(log_level, "rank-1 end"); /* no need to free anything, they are in nmems */ } @@ -172,14 +171,14 @@ static void add (void *set_handle, int seqno, TERMID term) assert(si); if (!term) { - yaz_log (log_level, "rank-1 add NULL term"); + yaz_log(log_level, "rank-1 add NULL term"); return; } ti= (struct rank_term_info *) term->rankpriv; assert(ti); si->last_pos = seqno; ti->local_occur++; - yaz_log (log_level, "rank-1 add seqno=%d term=%s count=%d", + yaz_log(log_level, "rank-1 add seqno=%d term=%s count=%d", seqno, term->name,ti->local_occur); } @@ -189,13 +188,13 @@ static void add (void *set_handle, int seqno, TERMID term) * score should be between 0 and 1000. If score cannot be obtained * -1 should be returned. */ -static int calc (void *set_handle, zint sysno) +static int calc (void *set_handle, zint sysno, zint staticrank) { int i, lo, divisor, score = 0; struct rank_set_info *si = (struct rank_set_info *) set_handle; - if (!si->no_rank_entries) - return -1; + if (!si->no_rank_entries) + return -1; /* ranking not enabled for any terms */ for (i = 0; i < si->no_entries; i++) { @@ -207,7 +206,7 @@ static int calc (void *set_handle, zint sysno) } divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries)); score = score / divisor; - yaz_log (log_level, "calc sysno=" ZINT_FORMAT " score=%d", sysno, score); + yaz_log(log_level, "calc sysno=" ZINT_FORMAT " score=%d", sysno, score); if (score > 1000) score = 1000; /* reset the counts for the next term */ @@ -244,4 +243,4 @@ static struct rank_control rank_control = { add, }; -struct rank_control *rank1_class = &rank_control; +struct rank_control *rank_1_class = &rank_control; diff --git a/index/rankstatic.c b/index/rankstatic.c new file mode 100644 index 0000000..a17055f --- /dev/null +++ b/index/rankstatic.c @@ -0,0 +1,156 @@ +/* $Id: rankstatic.c,v 1.1 2005-08-19 09:21:34 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include +#ifdef WIN32 +#include +#endif +#if HAVE_UNISTD_H +#include +#endif + +#include "index.h" + +static int log_level = 0; +static int log_initialized = 0; + +struct rank_set_info { + int no_rank_entries; +}; + +/* + * create: Creates/Initialises this rank handler. This routine is + * called exactly once. The routine returns the class_handle. + */ +static void *create (ZebraHandle zh) +{ + if (!log_initialized) + { + log_level = yaz_log_module_level("rankstatic"); + log_initialized = 1; + } + yaz_log(log_level, "rank-static create"); + return 0; +} + +/* + * destroy: Destroys this rank handler. This routine is called + * when the handler is no longer needed - i.e. when the server + * dies. The class_handle was previously returned by create. + */ +static void destroy (struct zebra_register *reg, void *class_handle) +{ + yaz_log(log_level, "rank-static destroy"); +} + + +/** + * begin: Prepares beginning of "real" ranking. Called once for + * each result set. The returned handle is a "set handle" and + * will be used in each of the handlers below. + */ +static void *begin (struct zebra_register *reg, + void *class_handle, RSET rset, NMEM nmem, + TERMID *terms, int numterms) +{ + struct rank_set_info *si = + (struct rank_set_info *) nmem_malloc (nmem, sizeof(*si)); + int i; + + yaz_log(log_level, "rank-static begin"); + /* count how many terms are ranked (2=102 or similar) */ + si->no_rank_entries = 0; + for (i = 0; i < numterms; i++) + { + yaz_log(log_level, "i=%d flags=%s '%s'", i, + terms[i]->flags, terms[i]->name ); + if (!strncmp (terms[i]->flags, "rank,", 5)) + (si->no_rank_entries)++; + } + return si; +} + +/* + * end: Terminates ranking process. Called after a result set + * has been ranked. + */ +static void end (struct zebra_register *reg, void *set_handle) +{ + yaz_log(log_level, "rank-static end"); +} + + +/** + * add: Called for each word occurence in a result set. This routine + * should be as fast as possible. This routine should "incrementally" + * update the score. + */ +static void add (void *set_handle, int seqno, TERMID term) +{ +} + +/* + * calc: Called for each document in a result. This handler should + * produce a score based on previous call(s) to the add handler. The + * score should be between 0 and 1000. If score cannot be obtained + * -1 should be returned. + */ +static int calc (void *set_handle, zint sysno, zint staticrank) +{ + struct rank_set_info *si = (struct rank_set_info *) set_handle; + + if (!si->no_rank_entries) + return -1; /* ranking not enabled for any terms */ + + return staticrank+10; +} + +/* + * Pseudo-meta code with sequence of calls as they occur in a + * server. Handlers are prefixed by --: + * + * server init + * -- create + * foreach search + * rank result set + * -- begin + * foreach record + * foreach word + * -- add + * -- calc + * -- end + * -- destroy + * server close + */ + +static struct rank_control rank_control = { + "rank-static", + create, + destroy, + begin, + end, + calc, + add, +}; + +struct rank_control *rank_static_class = &rank_control; diff --git a/index/zebraapi.c b/index/zebraapi.c index 8a48f7a..ce56154 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.182 2005-08-18 12:50:17 adam Exp $ +/* $Id: zebraapi.c,v 1.183 2005-08-19 09:21:34 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -330,8 +330,9 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, reg->key_file_no = 0; reg->ptr_i = 0; - zebraRankInstall (reg, rank1_class); - zebraRankInstall (reg, rankzv_class); + zebraRankInstall (reg, rank_1_class); + zebraRankInstall (reg, rank_zv_class); + zebraRankInstall (reg, rank_static_class); recordCompression = res_get_def (res, "recordCompression", "none"); if (!strcmp (recordCompression, "none")) diff --git a/index/zsets.c b/index/zsets.c index 4094d72..ed60386 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.92 2005-08-18 19:20:38 adam Exp $ +/* $Id: zsets.c,v 1.93 2005-08-19 09:21:34 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -870,31 +870,38 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem, terms, numTerms); - zint psysno = 0; + zint psysno = 0; /* previous doc id / sys no */ + zint pstaticrank = 0; /* previous static rank */ while (rset_read(rfd, &key, &termid)) { zint this_sys = key.mem[sysno_mem_index]; + zint seqno = key.mem[key.len-1]; kno++; if (log_level_searchhits) key_logdump_txt(log_level_searchhits, &key, termid->name); - if (this_sys != psysno) - { + if (this_sys != psysno) + { /* new record .. */ if (rfd->counted_items > rset->hits_limit) break; if (psysno) - { - score = (*rc->calc) (handle, psysno); + { /* only if we did have a previous record */ + score = (*rc->calc) (handle, psysno, pstaticrank); + /* insert the hit. A=Ascending */ resultSetInsertRank (zh, sort_info, psysno, score, 'A'); count++; } psysno = this_sys; + if (zh->m_staticrank) + pstaticrank = key.mem[0]; } (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid); } + /* no more items */ if (psysno) - { - score = (*rc->calc)(handle, psysno); + { /* we had - at least - one record */ + score = (*rc->calc)(handle, psysno, pstaticrank); + /* insert the hit. A=Ascending */ resultSetInsertRank(zh, sort_info, psysno, score, 'A'); count++; } diff --git a/index/zvrank.c b/index/zvrank.c index ed9cefd..0985262 100644 --- a/index/zvrank.c +++ b/index/zvrank.c @@ -1,4 +1,4 @@ -/* $Id: zvrank.c,v 1.17 2005-05-24 11:27:19 adam Exp $ +/* $Id: zvrank.c,v 1.18 2005-08-19 09:21:34 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -782,7 +782,7 @@ static void zv_add (void *rsi, int seqno, TERMID term) { * score should be between 0 and 1000. If score cannot be obtained * -1 should be returned. */ -static int zv_calc (void *rsi, zint sysno) +static int zv_calc (void *rsi, zint sysno, zint staticrank) { int i, veclen; int score=0; @@ -839,6 +839,5 @@ static struct rank_control rank_control_vsm = { zv_add, }; -struct rank_control *rankzv_class = &rank_control_vsm; +struct rank_control *rank_zv_class = &rank_control_vsm; -/* EOF */ diff --git a/test/xslt/zebrastaticrank.cfg b/test/xslt/zebrastaticrank.cfg index 4fe0f24..a214144 100644 --- a/test/xslt/zebrastaticrank.cfg +++ b/test/xslt/zebrastaticrank.cfg @@ -4,3 +4,4 @@ modulePath: ../../recctrl/.libs staticrank: 1 +rank: rank-static -- 1.7.10.4