From: Adam Dickmeiss Date: Mon, 6 May 2013 14:00:04 +0000 (+0200) Subject: Clear relevancy info when result set is clear'd X-Git-Tag: v1.6.31~24 X-Git-Url: http://git.indexdata.com/cgi-bin?a=commitdiff_plain;h=710ee248da13fb269e18d87e464ce2da038ea370;p=pazpar2-moved-to-github.git Clear relevancy info when result set is clear'd --- diff --git a/src/relevance.c b/src/relevance.c index b08d217..0551980 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -215,6 +215,15 @@ static void pull_terms(struct relevance *res, struct ccl_rpn_node *n) break; } } +void relevance_clear(struct relevance *r) +{ + if (r) + { + int i; + for (i = 0; i < r->vec_len; i++) + r->doc_frequency_vec[i] = 0; + } +} struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, struct ccl_rpn_node *query, @@ -224,7 +233,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, { NMEM nmem = nmem_create(); struct relevance *res = nmem_malloc(nmem, sizeof(*res)); - int i; res->nmem = nmem; res->entries = 0; @@ -238,8 +246,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, pull_terms(res, query); res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); - for (i = 0; i < res->vec_len; i++) - res->doc_frequency_vec[i] = 0; // worker array res->term_frequency_vec_tmp = @@ -249,6 +255,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, res->term_pos = nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos)); + relevance_clear(res); return res; } diff --git a/src/relevance.h b/src/relevance.h index 1337b60..5a095eb 100644 --- a/src/relevance.h +++ b/src/relevance.h @@ -32,6 +32,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, struct ccl_rpn_node *query, int rank_cluster, double follow_factor, double lead_decay, int length_divide); +void relevance_clear(struct relevance *r); void relevance_destroy(struct relevance **rp); void relevance_newrec(struct relevance *r, struct record_cluster *cluster); void relevance_countwords(struct relevance *r, struct record_cluster *cluster, diff --git a/src/session.c b/src/session.c index 187ae38..400dd04 100644 --- a/src/session.c +++ b/src/session.c @@ -631,6 +631,8 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp) se->total_records = se->total_merged = 0; se->num_termlists = 0; + relevance_clear(se->relevance); + /* reset list of sorted results and clear to relevance search */ se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results)); se->sorted_results->name = nmem_strdup(se->nmem, sp->name); diff --git a/test/test_http_89.res b/test/test_http_89.res index 9c542be..ec81a10 100644 --- a/test/test_http_89.res +++ b/test/test_http_89.res @@ -75,7 +75,7 @@ PAZPAR2_NULL_c 4 - 43178 + 41119 field=title content=Computer science &amp; technology :; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0))); @@ -90,9 +90,9 @@ field=title content=A plan for community college computer development.; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5))); computer: tf[1] += w[1](6) / length(7) (4.314286); relevance = 0; -idf[1] = log(((1 + total(20))/termoccur(19)); -computer: relevance += 100000 * tf[1](4.314286) * idf[1](0.100083) (43178); -score = relevance(43178); +idf[1] = log(((1 + total(10))/termoccur(10)); +computer: relevance += 100000 * tf[1](4.314286) * idf[1](0.095310) (41119); +score = relevance(41119); content: date @@ -176,7 +176,7 @@ score = relevance(43178); PAZPAR2_NULL_c 4 - 34361 + 32723 field=title content=Computer processing of dynamic images from an Ang ...; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0))); @@ -188,9 +188,9 @@ field=title-remainder content=a portfolio of thematic computer maps /; computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4))); computer: tf[1] += w[1](5) / length(6) (3.433333); relevance = 0; -idf[1] = log(((1 + total(20))/termoccur(19)); -computer: relevance += 100000 * tf[1](3.433333) * idf[1](0.100083) (34361); -score = relevance(34361); +idf[1] = log(((1 + total(10))/termoccur(10)); +computer: relevance += 100000 * tf[1](3.433333) * idf[1](0.095310) (32723); +score = relevance(32723); content: date c @@ -206,15 +206,15 @@ score = relevance(34361); YYYYYYYYY 1 - 12010 + 11437 field=title content=How to program a computer; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4))); computer: tf[1] += w[1](6) / length(5) (1.200000); relevance = 0; -idf[1] = log(((1 + total(20))/termoccur(19)); -computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.100083) (12010); -score = relevance(12010); +idf[1] = log(((1 + total(10))/termoccur(10)); +computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.095310) (11437); +score = relevance(11437); position: z3950.indexdata.com/marc-1 @@ -230,15 +230,15 @@ score = relevance(12010); YYYYYYYYY 1 - 12010 + 11437 field=title content=How to program a computer; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4))); computer: tf[1] += w[1](6) / length(5) (1.200000); relevance = 0; -idf[1] = log(((1 + total(20))/termoccur(19)); -computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.100083) (12010); -score = relevance(12010); +idf[1] = log(((1 + total(10))/termoccur(10)); +computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.095310) (11437); +score = relevance(11437); position: z3950.indexdata.com/marc-2 diff --git a/test/test_http_90.res b/test/test_http_90.res index eee953c..837a9ff 100644 --- a/test/test_http_90.res +++ b/test/test_http_90.res @@ -25,7 +25,7 @@ YYYYYYYYY 2 - 24427 + 48160 field=title content=How to program a computer; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4))); @@ -34,9 +34,9 @@ field=title content=How to program a computer; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4))); computer: tf[1] += w[1](6) / length(5) (2.400000); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](2.400000) * idf[1](0.101783) (24427); -score = relevance(24427); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](2.400000) * idf[1](0.200671) (48160); +score = relevance(48160); content: title how to program a computer author jack collins medium book @@ -59,15 +59,15 @@ score = relevance(24427); PAZPAR2_NULL_c 1 - 20356 + 40134 field=title content=Computer science &amp; technology :; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0))); computer: tf[1] += w[1](6) / length(3) (2.000000); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.101783) (20356); -score = relevance(20356); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.200671) (40134); +score = relevance(40134); content: title computer science technology author medium book @@ -90,15 +90,15 @@ score = relevance(20356); YYYYYYYYY 1 - 20356 + 40134 field=title content=The Computer Bible /; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(1))); computer: tf[1] += w[1](6) / length(3) (2.000000); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.101783) (20356); -score = relevance(20356); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.200671) (40134); +score = relevance(40134); content: title the computer bible author medium book @@ -126,15 +126,15 @@ score = relevance(20356); PAZPAR2_NULL_c 1 - 8724 + 17200 field=title content=A plan for community college computer development.; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5))); computer: tf[1] += w[1](6) / length(7) (0.857143); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.101783) (8724); -score = relevance(8724); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.200671) (17200); +score = relevance(17200); content: title a plan for community college computer development author medium book @@ -161,15 +161,15 @@ score = relevance(8724); PAZPAR2_NULL_c 1 - 8724 + 17200 field=title content=Washington metropolitan area rail computer feasib ...; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4))); computer: tf[1] += w[1](6) / length(7) (0.857143); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.101783) (8724); -score = relevance(8724); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.200671) (17200); +score = relevance(17200); content: title washington metropolitan area rail computer feasibility study author englund carl r medium book @@ -199,15 +199,15 @@ score = relevance(8724); PAZPAR2_NULL_c 1 - 8481 + 16722 field=title-remainder content=a portfolio of thematic computer maps /; computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4))); computer: tf[1] += w[1](5) / length(6) (0.833333); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](0.833333) * idf[1](0.101783) (8481); -score = relevance(8481); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](0.833333) * idf[1](0.200671) (16722); +score = relevance(16722); content: title the puget sound region author mairs john w medium book @@ -242,15 +242,15 @@ score = relevance(8481); PAZPAR2_NULL_c 1 - 6106 + 12040 field=title content=Computer processing of dynamic images from an Ang ...; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0))); computer: tf[1] += w[1](6) / length(10) (0.600000); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.101783) (6106); -score = relevance(6106); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.200671) (12040); +score = relevance(12040); content: title computer processing of dynamic images from an anger scintillation camera author medium book @@ -273,15 +273,15 @@ score = relevance(6106); PAZPAR2_NULL_c 1 - 6106 + 12040 field=title content=The use of passwords for controlled access to com ...; computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(8))); computer: tf[1] += w[1](6) / length(10) (0.600000); relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.101783) (6106); -score = relevance(6106); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.200671) (12040); +score = relevance(12040); content: title the use of passwords for controlled access to computer resources author wood helen m medium book @@ -309,8 +309,8 @@ score = relevance(6106); 0 relevance = 0; -idf[1] = log(((1 + total(30))/termoccur(28)); -computer: relevance += 100000 * tf[1](0.000000) * idf[1](0.101783) (0); +idf[1] = log(((1 + total(10))/termoccur(9)); +computer: relevance += 100000 * tf[1](0.000000) * idf[1](0.200671) (0); score = relevance(0); content: title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book