From 49ae06a7901f27cce5657bdc57fd7d147bf59fa1 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 26 Mar 2003 16:41:48 +0000 Subject: [PATCH] Integrated ranking algorithm from Liverpool University --- CHANGELOG | 10 +- index/Makefile.am | 4 +- index/index.h | 7 +- index/livcode.c | 706 +++++++++++++++++++++++++++++++++++++++++++++++++++++ index/rank1.c | 17 +- index/trunc.c | 11 +- index/zebraapi.c | 8 +- index/zsets.c | 4 +- index/zvrank.c | 4 +- util/zebramap.c | 44 +--- 10 files changed, 759 insertions(+), 56 deletions(-) create mode 100644 index/livcode.c diff --git a/CHANGELOG b/CHANGELOG index 6dd2f5c..2735323 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,12 @@ -Zvrank: an experimental ranking algorithm. See doc/zvrank.txt - Johannes Leveling +Zvrank: an experimental ranking algorithm. See doc/zvrank.txt and +source in index/zvrank.c. Enable this by using rank: zvrank in zebra.cfg. +Contributed by Johannes Leveling + +livrank: another experimental ranking algorithm. Source in livcode.c. +Enable this by using rank: livrank in zebra.cfg and use -DLIV_CODE=1 +for CFLAGS. +Contributed by Pete Mallinson, University of Liverpool. Advanced MARC indexing. See doc/marc_indexing.xml Oleg Kolobov diff --git a/index/Makefile.am b/index/Makefile.am index 2355744..1978423 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.22 2003-03-04 23:36:29 adam Exp $ +## $Id: Makefile.am,v 1.23 2003-03-26 16:41:48 adam Exp $ noinst_PROGRAMS = apitest kdump @@ -7,7 +7,7 @@ noinst_LIBRARIES = libzebra.a libzebra_a_SOURCES = dir.c dirs.c trav.c kinput.c kcompare.c \ attribute.c symtab.c recindex.c recstat.c lockutil.c \ zebraapi.c zinfo.c invstat.c sortidx.c compact.c zsets.c zrpn.c \ - rank1.c trunc.c retrieve.c extract.c \ + rank1.c trunc.c retrieve.c extract.c livcode.c \ index.h recindex.h recindxp.h \ zinfo.h zserver.h zvrank.c diff --git a/index/index.h b/index/index.h index 1f0ac62..e8957c5 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.97 2003-03-13 04:25:17 pop Exp $ +/* $Id: index.h,v 1.98 2003-03-26 16:41:48 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -323,7 +323,7 @@ struct zebra_session { struct rank_control { char *name; - void *(*create)(struct zebra_register *reg); + void *(*create)(ZebraHandle zh); void (*destroy)(struct zebra_register *reg, void *class_handle); void *(*begin)(struct zebra_register *reg, void *class_handle, RSET rset); void (*end)(struct zebra_register *reg, void *set_handle); @@ -403,6 +403,7 @@ int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att); extern struct rank_control *rank1_class; extern struct rank_control *rankzv_class; +extern struct rank_control *rankliv_class; int zebra_record_fetch (ZebraHandle zh, int sysno, int score, ODR stream, oid_value input_format, Z_RecordComposition *comp, @@ -466,6 +467,8 @@ void zebra_end_read (ZebraHandle zh); int zebra_file_stat (const char *file_name, struct stat *buf, int follow_links); +void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query); + YAZ_END_CDECL #endif diff --git a/index/livcode.c b/index/livcode.c new file mode 100644 index 0000000..44dec1b --- /dev/null +++ b/index/livcode.c @@ -0,0 +1,706 @@ +/* + +The University of Liverpool + +Modifications to Zebra 1.1 / YAZ 1.7 to enable ranking +by attribute weight. + +Copyright (c) 2001-2002 The University of Liverpool. All +rights reserved. + +Licensed under the Academic Free License version 1.1. +http://opensource.org/licenses/academic.php + +$Id: livcode.c,v 1.1 2003-03-26 16:41:48 adam Exp $ + +*/ + +#include +#include +#ifdef WIN32 +#include +#else +#include +#endif +#include + +#include "index.h" +#include "zserver.h" + +/* +** These functions/routines +** 1. reads in and builds a linked list of rank attr/rank score pairs +** 2. expand a simple query into a paired list of complex/simple nodes. +*/ + +typedef struct rstype +{ + struct rstype *next_rsnode ; + int rank ; + int score ; + char *rankstr ; +} rsnode, *refrsnode ; + +refrsnode start_rsnode = NULL ; + +/* +** Function/Routine prototypes +*/ +static int search_for_score( char *rankstr ) ; +static char *search_for_rankstr( int rank ) ; +static int search_for_rank( int rank ) ; +static refrsnode set_rsnode( int rank, int score ) ; +static int read_zrank_file(ZebraHandle zh) ; + +static void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ; +static void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ; +static Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop ) ; +static Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp ) ; +static Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 ) ; +static Z_Operand *set_operand( Z_Operand *thisop, int newattr ) ; +static int check_operand_attrs( Z_Operand *thisop ) ; + +/* +** search_for_score() +** given a rank-string traverse down the linked list ; +** return its score if found otherwise return -1. +*/ +int search_for_score( char *rankstr ) +{ + refrsnode node = start_rsnode ; + int rank ; + + if ( sscanf( rankstr,"%d",&rank ) ) + { + while ( node ) + { + if ( node->rank == rank ) return node->score ; + node = node->next_rsnode ; + } + } + return -1 ; +} + +/* +** search_for_rankstr() +** given a rank traverse down the linked list ; +** return its string if found otherwise return NULL. +*/ +char *search_for_rankstr( int rank ) +{ + refrsnode node = start_rsnode ; + + while ( node ) + { + if ( node->rank == rank ) return node->rankstr ; + node = node->next_rsnode ; + } + return "rank" ; +} + +/* +** search_for_rank() +** given a rank traverse down the linked list ; +** return 1 if found otherwise return 0. +*/ +int search_for_rank( int rank ) +{ + refrsnode node = start_rsnode ; + + while ( node ) + { + if ( node->rank == rank ) return 1 ; + node = node->next_rsnode ; + } + return 0 ; +} + +/* +** set_rsnode() +** given a rank and a score, build the rest of the rsnode structure. +*/ +refrsnode set_rsnode( int rank, int score ) +{ +#define BUFFMAX 128 + refrsnode node = (refrsnode)malloc( sizeof(rsnode) ) ; + char buff[BUFFMAX] ; + + node->next_rsnode = NULL ; + node->rank = rank ; + node->score = score ; + + sprintf( buff,"%d",rank ) ; + node->rankstr = (char *)malloc( strlen(buff)+1 ) ; + strcpy( node->rankstr, buff ) ; + + return node ; +} + +/* +** read_zrank_file(zh) +** read in the rankfile and build the rank/score linked list ; +** return 0 : can't open the zebra config. file +** return 0 : can't find the rankfile entry in the zebra config. file +** return 0 : can't open the rankfile itself +** return the number of distinct ranks read in. +*/ +int read_zrank_file(ZebraHandle zh) +{ +#define LINEMAX 256 + char line[ LINEMAX ] ; + char rname[ LINEMAX ] ; + char *lineptr ; + FILE *ifd ; + int rank = 0 ; + int score = 0 ; + int numranks = 0 ; + + /* + ** open the zebra configuration file and look for the "rankfile:" + ** entry which contains the path/name of the rankfile + */ + + const char *rankfile = res_get_def(zh->res, "rankfile", 0); + const char *profilePath = res_get_def(zh->res, "profilePath", + DEFAULT_PROFILE_PATH); + + if (!rankfile) + { + yaz_log(LOG_LOG, "rankfile entry not found in config file" ) ; + return 0 ; + } + ifd = yaz_path_fopen(profilePath, rankfile, "r" ) ; + if ( ifd ) + { + while ( (lineptr = fgets( line,LINEMAX,ifd )) ) + { + if ( sscanf( lineptr,"rankfile: %s", rname ) == 1 ) + rankfile = rname ; + } + + /* + ** open the rankfile and read the rank/score pairs + ** ignore 1016 + ** ignore duplicate ranks + ** ignore ranks without +ve scores + */ + if ( rankfile ) + { + if ( !(ifd = fopen( rankfile, "r" )) ) + { + logf( LOG_LOG, "unable to open rankfile %s",rankfile ) ; + return 0; + } + + while ( (lineptr = fgets( line,LINEMAX,ifd )) ) + { + sscanf( lineptr,"%d : %d", &rank,&score ) ; + if ( ( score > 0 ) && ( rank != 1016 ) ) + { + refrsnode new_rsnode ; + + if ( search_for_rank( rank ) == 0 ) + { + new_rsnode = set_rsnode( rank,score ) ; + new_rsnode->next_rsnode = start_rsnode ; + start_rsnode = new_rsnode ; + numranks++ ; + } + } + } + } + else + { + yaz_log(LOG_WARN|LOG_ERRNO, "unable to open config file (%s)", + rankfile); + } + } + return numranks ; +} + +/* +** set_operand() +** build an operand "node" - hav to make a complete copy of thisop and +** then insert newattr in the appropriate place +** +*/ +Z_Operand *set_operand( Z_Operand *thisop, int newattr ) +{ + Z_Operand *operand ; + Z_AttributesPlusTerm *attributesplusterm ; + Z_AttributeList *attributelist ; + Z_AttributeElement *attributeelement ; + Z_AttributeElement *attrptr ; + Z_AttributeElement **attrptrptr ; + Z_Term *term ; + Odr_oct *general ; + int i ; + + operand = (Z_Operand *) + malloc( sizeof( Z_Operand ) ) ; + attributesplusterm = (Z_AttributesPlusTerm *) + malloc( sizeof( Z_AttributesPlusTerm ) ) ; + attributelist = (Z_AttributeList *) + malloc( sizeof( Z_AttributeList ) ) ; + attributeelement = (Z_AttributeElement *) + malloc( sizeof( Z_AttributeElement ) ) ; + term = (Z_Term *) + malloc( sizeof( Z_Term ) ) ; + general = (Odr_oct *) + malloc( sizeof( Odr_oct ) ) ; + + operand->which = Z_Operand_APT ; + operand->u.attributesPlusTerm = attributesplusterm ; + + attributesplusterm->attributes = attributelist ; + attributesplusterm->term = term ; + + attributelist->num_attributes = thisop->u.attributesPlusTerm-> + attributes->num_attributes ; + + attrptr = (Z_AttributeElement *) malloc( sizeof(Z_AttributeElement) * + attributelist->num_attributes ) ; + attrptrptr = (Z_AttributeElement **) malloc( sizeof(Z_AttributeElement) * + attributelist->num_attributes ) ; + + attributelist->attributes = attrptrptr ; + + for ( i = 0 ; i < attributelist->num_attributes ; i++ ) + { + *attrptr = *thisop->u.attributesPlusTerm->attributes->attributes[i] ; + + attrptr->attributeType = (int *)malloc( sizeof(int *) ) ; + *attrptr->attributeType = *thisop->u.attributesPlusTerm->attributes-> + attributes[i]->attributeType; + + attrptr->value.numeric = (int *)malloc( sizeof(int *) ) ; + *attrptr->value.numeric = *thisop->u.attributesPlusTerm->attributes-> + attributes[i]->value.numeric; + + if ( (*attrptr->attributeType == 1) && + (*attrptr->value.numeric == 1016) ) + { + *attrptr->value.numeric = newattr ; + } + *attrptrptr++ = attrptr++ ; + } + + term->which = Z_Term_general ; + term->u.general = general ; + + general->len = thisop->u.attributesPlusTerm->term->u.general->len ; + general->size = thisop->u.attributesPlusTerm->term->u.general->size ; + general->buf = malloc( general->size ) ; + strcpy( general->buf, + thisop->u.attributesPlusTerm->term->u.general->buf ) ; + + return operand ; +} + +/* +** set_2operands() +** build a complex "node" with two (simple) operand "nodes" as branches +*/ +Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 ) +{ + Z_Complex *top ; + Z_RPNStructure *s1 ; + Z_RPNStructure *s2 ; + Z_Operator *roperator ; + + top = (Z_Complex *) malloc( sizeof( Z_Complex ) ) ; + s1 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ; + s2 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ; + roperator = (Z_Operator *) malloc( sizeof( Z_Operator ) ) ; + + top->roperator = roperator ; + top->roperator->which = Z_Operator_or ; + top->roperator->u.op_or = odr_nullval() ; + + top->s1 = s1 ; + top->s1->which = Z_RPNStructure_simple ; + top->s1->u.simple = sim1 ; + + top->s2 = s2 ; + top->s2->which = Z_RPNStructure_simple ; + top->s2->u.simple = sim2 ; + + return top ; +} + +/* +** set_1complex_1operand() +** build a complex "node" with a complex "node" branch and an +** operand "node" branch +*/ +Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp ) +{ + Z_Complex *top ; + Z_RPNStructure *s1 ; + Z_RPNStructure *s2 ; + Z_Operator *roperator ; + + top = (Z_Complex *) malloc( sizeof( Z_Complex ) ) ; + s1 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ; + s2 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ; + roperator = (Z_Operator *) malloc( sizeof( Z_Operator ) ) ; + + top->roperator = roperator ; + top->roperator->which = Z_Operator_or ; + top->roperator->u.op_or = odr_nullval() ; + + top->s1 = s1 ; + top->s1->which = Z_RPNStructure_complex ; + top->s1->u.complex = comp ; + + top->s2 = s2 ; + top->s2->which = Z_RPNStructure_simple ; + top->s2->u.simple = simp ; + + return top ; +} + +/* +** expand_query() +** expand a simple query into a number of complex queries +*/ +Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop ) +{ + Z_Complex *top ; + int numattrs = 0 ; + + /* + ** start_rsnode will be set if we have already read the rankfile + ** so don't bother again but we need to know the number of attributes + ** in the linked list so traverse it again to find out how many. + */ + if ( start_rsnode ) + { + refrsnode node = start_rsnode ; + while ( node ) + { + numattrs++ ; + node = node->next_rsnode ; + } + } + + /* + ** only expand the query if there are 2 or more attributes + */ + if ( numattrs >= 2 ) + { + refrsnode node = start_rsnode ; + int attr1 ; + int attr2 ; + + attr1 = node->rank ; node = node->next_rsnode ; + attr2 = node->rank ; node = node->next_rsnode ; + + /* + ** this is the special case and has to be done first because the + ** last complex node in the linear list has two simple nodes whereas + ** all the others have a complex and a simple. + */ + top = set_2operands( set_operand( thisop,attr1 ), + set_operand( thisop,attr2 ) ) ; + + /* + ** do the rest as complex/simple pairs + */ + while ( node ) + { + attr1 = node->rank ; node = node->next_rsnode ; + top = set_1complex_1operand( top,set_operand( thisop,attr1 ) ) ; + } + /* + ** finally add the 1016 rank attribute at the top of the tree + */ + top = set_1complex_1operand( top,set_operand( thisop,1016 ) ) ; + + return top ; + } + else return NULL ; +} + +/* +** check_operand_attrs() +** loop through the attributes of a particular operand +** return 1 if (type==1 && value==1016) && (type==2 && value==102) +** otherwise return 0 +*/ +int check_operand_attrs( Z_Operand *thisop ) +{ + Z_AttributeElement *attrptr ; + int cond1 = 0 ; + int cond2 = 0 ; + int numattrs ; + int i ; + + numattrs = thisop->u.attributesPlusTerm->attributes->num_attributes ; + + for ( i = 0 ; i < numattrs ; i++ ) + { + attrptr = thisop->u.attributesPlusTerm->attributes->attributes[i] ; + + if ( (*attrptr->attributeType == 1) && + (*attrptr->value.numeric == 1016) ) + cond1 = 1 ; + + if ( (*attrptr->attributeType == 2) && + (*attrptr->value.numeric == 102) ) + cond2 = 1 ; + } + + return (cond1 & cond2) ; +} + +/* +** convert_simple2complex() +** +*/ +void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct ) +{ + Z_Complex *complex = NULL ; + Z_Operand *operand = rpnstruct->u.simple ; + + if ( check_operand_attrs( operand ) ) + { + complex = expand_query(zh, operand ) ; + + if ( complex ) + { + /* + ** Everything is complete so replace the original + ** operand with the newly built complex structure + ** This is it ... no going back!! + */ + rpnstruct->which = Z_RPNStructure_complex ; + rpnstruct->u.complex = complex ; + } + } +} + +/* +** walk_complex_query() +** recursively traverse the tree expanding any simple queries we find +*/ +void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct ) +{ + if ( rpnstruct->which == Z_RPNStructure_simple ) + { + convert_simple2complex(zh, rpnstruct ) ; + } + else + { + walk_complex_query(zh, rpnstruct->u.complex->s1 ) ; + walk_complex_query(zh, rpnstruct->u.complex->s2 ) ; + } +} + +void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query) +{ + /* + ** Got a search request, + ** 1. if it is a simple query, see if it suitable for expansion + ** i.e. the attributes are of the form ... + ** (type==1 && value==1016) && (type==2 && value==102) + ** or + ** 2. if it is complex, traverse the complex query tree and expand + ** any simples simples as above + */ +#if LIV_CODE + Z_RPNStructure *rpnstruct = query->RPNStructure ; + + if ( rpnstruct->which == Z_RPNStructure_simple ) + { + convert_simple2complex(zh, rpnstruct ) ; + } + else if ( rpnstruct->which == Z_RPNStructure_complex ) + { + walk_complex_query(zh, rpnstruct ) ; + } +#endif +} + + +struct rank_class_info { + int dummy; +}; + +struct rank_term_info { + int local_occur; + int global_occur; + int global_inv; + int rank_flag; +}; + +struct rank_set_info { + int last_pos; + int no_entries; + int no_rank_entries; + struct rank_term_info *entries; +}; + +static int log2_int (unsigned g) +{ + int n = 0; + while ((g = g>>1)) + n++; + return n; +} + +/* + * create: Creates/Initialises this rank handler. This routine is + * called exactly once. The routine returns the class_handle. + */ +static void *create (ZebraHandle zh) +{ + struct rank_class_info *ci = (struct rank_class_info *) + xmalloc (sizeof(*ci)); + + logf (LOG_DEBUG, "livrank create"); + + read_zrank_file(zh) ; + + return ci; +} + +/* + * destroy: Destroys this rank handler. This routine is called + * when the handler is no longer needed - i.e. when the server + * dies. The class_handle was previously returned by create. + */ +static void destroy (struct zebra_register *reg, void *class_handle) +{ + struct rank_class_info *ci = (struct rank_class_info *) class_handle; + + logf (LOG_DEBUG, "livrank destroy"); + xfree (ci); +} + + +/* + * begin: Prepares beginning of "real" ranking. Called once for + * each result set. The returned handle is a "set handle" and + * will be used in each of the handlers below. + */ +static void *begin (struct zebra_register *reg, void *class_handle, RSET rset) +{ + struct rank_set_info *si = (struct rank_set_info *) xmalloc (sizeof(*si)); + int i; + + logf (LOG_DEBUG, "livrank begin"); + si->no_entries = rset->no_rset_terms; + si->no_rank_entries = 0; + si->entries = (struct rank_term_info *) + xmalloc (sizeof(*si->entries)*si->no_entries); + for (i = 0; i < si->no_entries; i++) + { + const char *flags = rset->rset_terms[i]->flags; + int g = rset->rset_terms[i]->nn; + const char *cp = strstr(flags, ",u="); + + si->entries[i].rank_flag = 1; + if (cp) + { + char *t = search_for_rankstr(atoi(cp+3)); + if (t) + si->entries[i].rank_flag = search_for_score(t) ; + } + if ( si->entries[i].rank_flag ) + (si->no_rank_entries)++; + + si->entries[i].local_occur = 0; + si->entries[i].global_occur = g; + si->entries[i].global_inv = 32 - log2_int (g); + logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g)); + } + return si; +} + +/* + * end: Terminates ranking process. Called after a result set + * has been ranked. + */ +static void end (struct zebra_register *reg, void *set_handle) +{ + struct rank_set_info *si = (struct rank_set_info *) set_handle; + logf (LOG_DEBUG, "livrank end"); + xfree (si->entries); + xfree (si); +} + +/* + * add: Called for each word occurence in a result set. This routine + * should be as fast as possible. This routine should "incrementally" + * update the score. + */ +static void add (void *set_handle, int seqno, int term_index) +{ + struct rank_set_info *si = (struct rank_set_info *) set_handle; + logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index); + si->last_pos = seqno; + si->entries[term_index].local_occur++; +} + +/* + * calc: Called for each document in a result. This handler should + * produce a score based on previous call(s) to the add handler. The + * score should be between 0 and 1000. If score cannot be obtained + * -1 should be returned. + */ +static int calc (void *set_handle, int sysno) +{ + int i, lo, divisor, score = 0; + struct rank_set_info *si = (struct rank_set_info *) set_handle; + + logf (LOG_DEBUG, "livrank calc sysno=%d", sysno); + + if (!si->no_rank_entries) + return -1; + for (i = 0; i < si->no_entries; i++) + { + score += si->entries[i].local_occur * si->entries[i].rank_flag ; + } + for (i = 0; i < si->no_entries; i++) + if (si->entries[i].rank_flag && (lo = si->entries[i].local_occur)) + score += (8+log2_int (lo)) * si->entries[i].global_inv; + score *= 34; + divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries)); + score = score / divisor; + if (score > 1000) + score = 1000; + for (i = 0; i < si->no_entries; i++) + si->entries[i].local_occur = 0; + return score; +} + +/* + * Pseudo-meta code with sequence of calls as they occur in a + * server. Handlers are prefixed by --: + * + * server init + * -- create + * foreach search + * rank result set + * -- begin + * foreach record + * foreach word + * -- add + * -- calc + * -- end + * -- destroy + * server close + */ + +static struct rank_control rank_control = { + "livrank", + create, + destroy, + begin, + end, + calc, + add, +}; + +struct rank_control *rankliv_class = &rank_control; diff --git a/index/rank1.c b/index/rank1.c index f233fe3..9ad92a6 100644 --- a/index/rank1.c +++ b/index/rank1.c @@ -1,4 +1,4 @@ -/* $Id: rank1.c,v 1.12 2003-01-13 22:45:22 adam Exp $ +/* $Id: rank1.c,v 1.13 2003-03-26 16:41:48 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -30,7 +30,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #endif -#define DEBUG_RANK 0 +#define DEBUG_RANK 1 #include "index.h" @@ -65,7 +65,7 @@ static int log2_int (unsigned g) * create: Creates/Initialises this rank handler. This routine is * called exactly once. The routine returns the class_handle. */ -static void *create (struct zebra_register *reg) +static void *create (ZebraHandle zh) { struct rank_class_info *ci = (struct rank_class_info *) xmalloc (sizeof(*ci)); @@ -113,10 +113,15 @@ static void *begin (struct zebra_register *reg, void *class_handle, RSET rset) #endif if (!strncmp (rset->rset_terms[i]->flags, "rank,", 5)) { + const char *cp = strstr(rset->rset_terms[i]->flags+4, ",w="); si->entries[i].rank_flag = 1; - si->entries[i].rank_weight = atoi (rset->rset_terms[i]->flags+5); + if (cp) + si->entries[i].rank_weight = atoi (cp+3); + else + si->entries[i].rank_weight = 34; #if DEBUG_RANK - yaz_log (LOG_LOG, " weight=%d", i, si->entries[i].rank_weight); + yaz_log (LOG_LOG, " i=%d weight=%d", i, + si->entries[i].rank_weight); #endif (si->no_rank_entries)++; } @@ -125,7 +130,7 @@ static void *begin (struct zebra_register *reg, void *class_handle, RSET rset) si->entries[i].local_occur = 0; si->entries[i].global_occur = g; si->entries[i].global_inv = 32 - log2_int (g); - yaz_log (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g)); + yaz_log (LOG_DEBUG, " global_inv = %d g = %d", 32 - log2_int (g), g); } return si; } diff --git a/index/trunc.c b/index/trunc.c index cd69643..958eb28 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,4 +1,4 @@ -/* $Id: trunc.c,v 1.27 2002-08-02 19:26:55 adam Exp $ +/* $Id: trunc.c,v 1.28 2003-03-26 16:41:48 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -142,6 +142,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, RSET result; RSFD result_rsfd; rset_temp_parms parms; + int nn = 0; parms.cmp = key_compare_it; parms.key_size = sizeof(struct it_key); @@ -194,6 +195,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + nn++; while (1) { @@ -239,6 +241,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + nn++; if (preserve_position) { /* section that preserve all keys */ @@ -294,6 +297,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + nn++; if (preserve_position) { heap_delete (ti); @@ -350,6 +354,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + nn++; #if 0 /* section that preserve all keys */ heap_delete (ti); @@ -384,6 +389,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, ISAMS_PP *ispt; int i; struct trunc_info *ti; + int nn = 0; ispt = (ISAMS_PP *) xmalloc (sizeof(*ispt) * (to-from)); @@ -402,6 +408,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + nn++; while (1) { if (!isams_pp_read (ispt[n], ti->tmpbuf)) @@ -444,6 +451,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + nn++; if (preserve_position) { @@ -478,6 +486,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, else logf (LOG_WARN, "Unknown isam set in rset_trunc_r"); + parms.rset_term->nn = nn; rset_close (result, result_rsfd); return result; } diff --git a/index/zebraapi.c b/index/zebraapi.c index 2a43bc9..0a35c80 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.95 2003-03-26 00:02:05 adam Exp $ +/* $Id: zebraapi.c,v 1.96 2003-03-26 16:41:48 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -241,6 +241,7 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, zebraRankInstall (reg, rank1_class); zebraRankInstall (reg, rankzv_class); + zebraRankInstall (reg, rankliv_class); recordCompression = res_get_def (res, "recordCompression", "none"); if (!strcmp (recordCompression, "none")) @@ -753,6 +754,9 @@ void zebra_search_rpn (ZebraHandle zh, ODR decode, ODR stream, if (zebra_begin_read (zh)) return; + + zebra_livcode_transform(zh, query); + resultSetAddRPN (zh, decode, stream, query, zh->num_basenames, zh->basenames, setname); @@ -1586,7 +1590,7 @@ void zebra_set_resource(ZebraHandle zh, const char *name, const char *value) } const char *zebra_get_resource(ZebraHandle zh, - const char *name, const char *defaultvalue) + const char *name, const char *defaultvalue) { ASSERTZH; zh->errCode=0; diff --git a/index/zsets.c b/index/zsets.c index a794cfe..299a534 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.43 2003-03-06 11:58:08 adam Exp $ +/* $Id: zsets.c,v 1.44 2003-03-26 16:41:48 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -765,7 +765,7 @@ ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name) if (p && !p->init_flag) { if (p->control->create) - p->class_handle = (*p->control->create)(zh->reg); + p->class_handle = (*p->control->create)(zh); p->init_flag = 1; } return p; diff --git a/index/zvrank.c b/index/zvrank.c index 1fffdea..2f9d91f 100644 --- a/index/zvrank.c +++ b/index/zvrank.c @@ -1,4 +1,4 @@ -/* $Id: zvrank.c,v 1.2 2003-03-03 10:31:46 adam Exp $ +/* $Id: zvrank.c,v 1.3 2003-03-26 16:41:48 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -659,7 +659,7 @@ struct rank_set_info { * zv_create: Creates/Initialises this rank handler. This routine is * called exactly once. The routine returns the class_handle. */ -static void *zv_create (struct zebra_register *reg) { +static void *zv_create (ZebraHandle zh) { struct rank_class_info *ci = (struct rank_class_info *) xmalloc (sizeof(*ci)); yaz_log(LOG_DEBUG, "zv_create\n"); diff --git a/util/zebramap.c b/util/zebramap.c index b89aae2..c46dbbc 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,5 +1,5 @@ -/* $Id: zebramap.c,v 1.29 2002-12-16 22:59:34 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: zebramap.c,v 1.30 2003-03-26 16:41:48 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps This file is part of the Zebra server. @@ -304,40 +304,6 @@ const char **zebra_maps_input (ZebraMaps zms, unsigned reg_id, return zms->temp_map_ptr; } -#if 0 -int zebra_maps_input_tokens (ZebraMaps zms, unsigned reg_id, - const char *input_str, int input_len, - WRBUF wrbuf) -{ - chrmaptab maptab = zebra_charmap_get (zms, reg_id); - int len[4]; - char *str[3]; - int input_i = 0; - int first = 1; - const char **out; - - if (!maptab) - { - wrbuf_write (wrbuf, input_str, input_len); - return -1; - } - str[0] = " "; - len[0] = 1; - str[1] = input_str; - len[1] = input_len; - str[2] = " "; - len[2] = 1; - len[3] = -1; - - out = chr_map_input (maptab, str, len); - while (len[1] > 0) - { - while (out && *out && **out == *CHR_SPACE) - out = chr_map_input (maptab, str, len); - } -} -#endif - const char *zebra_maps_output(ZebraMaps zms, unsigned reg_id, const char **from) { @@ -472,23 +438,27 @@ int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, AttrType relation; AttrType sort_relation; AttrType weight; + AttrType use; int completeness_value; int structure_value; int relation_value; int sort_relation_value; int weight_value; + int use_value; attr_init_APT (&structure, zapt, 4); attr_init_APT (&completeness, zapt, 6); attr_init_APT (&relation, zapt, 2); attr_init_APT (&sort_relation, zapt, 7); attr_init_APT (&weight, zapt, 9); + attr_init_APT (&use, zapt, 1); completeness_value = attr_find (&completeness, NULL); structure_value = attr_find (&structure, NULL); relation_value = attr_find (&relation, NULL); sort_relation_value = attr_find (&sort_relation, NULL); weight_value = attr_find (&weight, NULL); + use_value = attr_find(&use, NULL); if (completeness_value == 2 || completeness_value == 3) *complete_flag = 1; @@ -503,7 +473,7 @@ int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, { if (weight_value == -1) weight_value = 34; - sprintf (rank_type, "rank,%d", weight_value); + sprintf (rank_type, "rank,w=%d,u=%d", weight_value, use_value); } if (relation_value == 103) { -- 1.7.10.4