X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzvrank.c;h=ecebce32a2fa89788732c83e2939db25d8f434ac;hp=622278ef49eb0c78243ad25cc487b0f491689ef7;hb=73bedd5ba9152c9c107b502fae65723b551aff09;hpb=00c427dace60183f4ba7bacdcdf004ac419527ce diff --git a/index/zvrank.c b/index/zvrank.c index 622278e..ecebce3 100644 --- a/index/zvrank.c +++ b/index/zvrank.c @@ -1,4 +1,4 @@ -/* $Id: zvrank.c,v 1.6 2004-06-08 13:27:48 marc Exp $ +/* $Id: zvrank.c,v 1.11 2004-10-26 15:32:11 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -40,6 +40,8 @@ fernuni-hagen.de> ** "ntc-atn", "atc-atn", etc. */ +#if SKIPTHIS /* FIXME - Disabled while changing the interface to ranking */ + #include /* for log */ #include @@ -61,8 +63,7 @@ static double blog(double x) { /* structures */ -struct rank_class_info { /* now we need this */ - int dummy; +struct rank_class_info { char rscheme[8]; /* name of weighting scheme */ }; @@ -76,6 +77,7 @@ struct rs_info { /* for result set */ char rscheme[8]; /* name of weighting scheme */ /**/ int veclen; + NMEM nmem; void (*d_tf_fct)(void *, void *); /* doc term frequency function */ void (*d_idf_fct)(void *, void *); /* doc idf function */ void (*d_norm_fct)(void *, void *); /* doc normalization function */ @@ -131,7 +133,7 @@ struct ts_info { /* term info */ char *name; int *id; /**/ - int gocc; + zint gocc; int locc; double tf; double idf; @@ -267,7 +269,7 @@ static void idf_none(void *rsi, void *dsi) { static void idf_tfidf(void *rsi, void *dsi) { RS rs=(RS)rsi; DS ds=(DS)dsi; - int num_docs, gocc; + zint num_docs, gocc; int i, veclen; double idf; /* normal tfidf weight */ @@ -278,7 +280,7 @@ static void idf_tfidf(void *rsi, void *dsi) { if (gocc==0) idf=0.0; else - idf=blog(num_docs/gocc); + idf=blog((double) (num_docs/gocc)); ds->terms[i].idf=idf; } return; @@ -287,7 +289,7 @@ static void idf_tfidf(void *rsi, void *dsi) { static void idf_prob(void *rsi, void *dsi) { RS rs=(RS)rsi; DS ds=(DS)dsi; - int num_docs, gocc; + zint num_docs, gocc; int i, veclen; double idf; /* probabilistic formulation */ @@ -298,7 +300,7 @@ static void idf_prob(void *rsi, void *dsi) { if (gocc==0) idf=0.0; else - idf=blog((num_docs-gocc)/gocc); + idf=blog((double) ((num_docs-gocc)/gocc)); ds->terms[i].idf=idf; } return; @@ -326,7 +328,7 @@ static void idf_freq(void *rsi, void *dsi) { static void idf_squared(void *rsi, void *dsi) { RS rs=(RS)rsi; DS ds=(DS)dsi; - int num_docs, gocc; + zint num_docs, gocc; int i, veclen; double idf; /* idf ^ 2 */ @@ -660,9 +662,10 @@ static void *zv_create (ZebraHandle zh) { struct rank_class_info *ci = (struct rank_class_info *) xmalloc (sizeof(*ci)); yaz_log(LOG_DEBUG, "zv_create"); - wscheme=res_get(res, "zvrank.weighting-scheme"); - for (i=0; (i < strlen(wscheme)) && (i < 8); i++) + wscheme=res_get_def(res, "zvrank.weighting-scheme", ""); + for (i=0; wscheme[i] && i < 8; i++) ci->rscheme[i]=wscheme[i]; + ci->rscheme[i] = '\0'; return ci; } @@ -683,16 +686,21 @@ static void zv_destroy (struct zebra_register *reg, void *class_handle) { * each result set. The returned handle is a "set handle" and * will be used in each of the handlers below. */ -static void *zv_begin(struct zebra_register *reg, void *class_handle, RSET rset) +static void *zv_begin(struct zebra_register *reg, void *class_handle, + RSET rset, NMEM nmem) { struct rs_info *rs=(struct rs_info *)xmalloc(sizeof(*rs)); struct rank_class_info *ci=(struct rank_class_info *)class_handle; int i; - int veclen, gocc; + int veclen; + zint gocc; /**/ yaz_log(LOG_DEBUG, "zv_begin"); - veclen=rset->no_rset_terms; /* smaller vector here */ + veclen= 0 ; /* rset->no_rset_terms;*/ /* smaller vector here */ + /* FIXME - Now that we don't have term lists in rsets, what do */ + /* we do here ??? */ zv_init(rs, ci->rscheme); + rs->nmem=nmem; rs->veclen=veclen; prn_rs(rs); @@ -710,7 +718,7 @@ static void *zv_begin(struct zebra_register *reg, void *class_handle, RSET rset) /* yaz_log(LOG_DEBUG, "zv_begin_init"); */ for (i = 0; i < rs->veclen; i++) { - gocc=rset->rset_terms[i]->nn; + gocc= 0; /* rset->rset_terms[i]->nn; */ /* FIXME ??? */ /* yaz_log(LOG_DEBUG, "zv_begin_init i=%d gocc=%d", i, gocc); */ rs->qdoc->terms[i].gocc=gocc; rs->qdoc->terms[i].locc=1; /* assume query has no duplicate terms */ @@ -756,7 +764,7 @@ static void zv_add (void *rsi, int seqno, int i) { * score should be between 0 and 1000. If score cannot be obtained * -1 should be returned. */ -static int zv_calc (void *rsi, int sysno) +static int zv_calc (void *rsi, zint sysno) { int i, veclen; int score=0; @@ -774,11 +782,11 @@ static int zv_calc (void *rsi, int sysno) (*rs->d_norm_fct)(rs, rs->rdoc); dscore=rs->sim_fct(rs->qdoc, rs->rdoc); } - score = dscore * 1000; - yaz_log (LOG_LOG, "sysno=%d score=%d", sysno, score); + score = (int) dscore * 1000; + yaz_log (LOG_LOG, "sysno=" ZINT_FORMAT " score=%d", sysno, score); if (score > 1000) /* should not happen */ score = 1000; - return score; + return (int) score; } /* @@ -811,4 +819,5 @@ static struct rank_control rank_control_vsm = { struct rank_control *rankzv_class = &rank_control_vsm; +#endif /* SKIPTHIS */ /* EOF */