From 8b5432d2794ccac566faef7d4ae5b25facdfc906 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Tue, 17 Dec 2013 16:04:48 +0100 Subject: [PATCH] Curve fitting that converges somewhat better Now (in one test), 28 iterations instead of 1000. This is for 2 decimals. I am still not quite happy about the resulting ranks, need to take another look some day. --- src/relevance.c | 80 ++++++++---- test/test_rank_12.res | 338 ++++++++++++++++++++++++------------------------- 2 files changed, 222 insertions(+), 196 deletions(-) diff --git a/src/relevance.c b/src/relevance.c index 9de591f..4282d8c 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -155,7 +155,6 @@ static void setup_norm_record( struct relevance *rel, struct record_cluster *cl { struct record_metadata *md = record->metadata[norm->scorefield]; rp->score = md->data.fnumber; - assert(rp->score>0); // ### } yaz_log(YLOG_LOG,"Got score for %d/%d : %f ", norm->num, record->position, rp->score ); @@ -187,9 +186,11 @@ static double squaresum( struct norm_record *rp, double a, double b) return sum; } +// For each client, normalize scores static void normalize_scores(struct relevance *rel) { - // For each client, normalize scores + const int maxiterations = 100; + const double enough = 100.0; // sets the number of decimals we are happy with struct norm_client *norm; for ( norm = rel->norm; norm; norm = norm->next ) { @@ -205,56 +206,81 @@ static void normalize_scores(struct relevance *rel) double a,b; // params to optimize double as,bs; // step sizes double chi; + char dir = 'a'; // initial guesses for the parameters if ( range < 1e-6 ) // practically zero range = norm->max; a = 1.0 / range; b = abs(norm->min); - as = a / 3; - bs = b / 3; + as = a / 10; + bs = b / 10; chi = squaresum( norm->records, a,b); - while (it++ < 100) // safeguard against things not converging + while (it++ < maxiterations) // safeguard against things not converging { - // optimize a - double plus = squaresum(norm->records, a+as, b); - double minus= squaresum(norm->records, a-as, b); - if ( plus < chi && plus < minus ) + double aplus = squaresum(norm->records, a+as, b); + double aminus= squaresum(norm->records, a-as, b); + double bplus = squaresum(norm->records, a, b+bs); + double bminus= squaresum(norm->records, a, b-bs); + if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus) { a = a + as; - chi = plus; + chi = aplus; + yaz_log(YLOG_LOG,"Fitting aplus it=%d: a=%f / %f b=%f / %f chi = %f", + it, a, as, b, bs, chi ); } - else if ( minus < chi && minus < plus ) + else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus) { a = a - as; - chi = minus; + chi = aminus; + yaz_log(YLOG_LOG,"Fitting aminus it=%d: a=%f / %f b=%f / %f chi = %f", + it, a, as, b, bs, chi ); } - else - as = as / 2; - // optimize b - plus = squaresum(norm->records, a, b+bs); - minus= squaresum(norm->records, a, b-bs); - if ( plus < chi && plus < minus ) + else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus) { b = b + bs; - chi = plus; + chi = bplus; + yaz_log(YLOG_LOG,"Fitting bplus it=%d: a=%f / %f b=%f / %f chi = %f", + it, a, as, b, bs, chi ); } - else if ( minus < chi && minus < plus ) + else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus) { b = b - bs; - chi = minus; + chi = bminus; + yaz_log(YLOG_LOG,"Fitting bminus it=%d: a=%f / %f b=%f / %f chi = %f", + it, a, as, b, bs, chi ); } else - bs = bs / 2; - yaz_log(YLOG_LOG,"Fitting it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + { + if ( as > bs ) + { + as = as / 2; + yaz_log(YLOG_LOG,"Fitting step a it=%d: a=%f / %f b=%f / %f chi = %f", + it, a, as, b, bs, chi ); + } + else + { + bs = bs / 2; + yaz_log(YLOG_LOG,"Fitting step b it=%d: a=%f / %f b=%f / %f chi = %f", + it, a, as, b, bs, chi ); + } + } norm->a = a; norm->b = b; - if ( abs(as) * 1000.0 < abs(a) && - abs(bs) * 1000.0 < abs(b) ) + if ( fabs(as) * enough < fabs(a) && + fabs(bs) * enough < fabs(b) ) { + yaz_log(YLOG_LOG,"Fitting done: stopping loop at %d" , it ); break; // not changing much any more + + } } + yaz_log(YLOG_LOG,"Fitting done: it=%d: a=%f / %f b=%f / %f chi = %f", + it-1, a, as, b, bs, chi ); + yaz_log(YLOG_LOG," a: %f < %f %d", + fabs(as)*enough, fabs(a), (fabs(as) * enough < fabs(a)) ); + yaz_log(YLOG_LOG," b: %f < %f %d", + fabs(bs)*enough, fabs(b), (fabs(bs) * enough < fabs(b)) ); } - + if ( norm->scorefield != scorefield_none ) { // distribute the normalized scores to the records struct norm_record *nr = norm->records; diff --git a/test/test_rank_12.res b/test/test_rank_12.res index b40a464..ce588ec 100644 --- a/test/test_rank_12.res +++ b/test/test_rank_12.res @@ -6,93 +6,161 @@ 0 19 - Water management problems and challenges in India - an analytical review + District water supply plan 2000 - Dinesh Kumar, M + [1] [No special title] -- [2] Appendixes book - Water management problems and challenges in India - an analytical review + name="LOC Solr Test" checksum="552609001"> + District water supply plan 2000 - Dinesh Kumar, M + [1] [No special title] -- [2] Appendixes book - 2.304635 + 1.928196 1 - 2000 - content: title water management problems and challenges in india author dinesh kumar m medium book + 3526 + content: title district water supply plan medium book - The magic of water - reflection and transparency at the water's edge + Proposition 13 + Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act 2000 - Hochschwender, Ted + "March 2000." book - The magic of water - reflection and transparency at the water's edge + name="LOC Solr Test" checksum="3232963828"> + Proposition 13 + Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act 2000 - Hochschwender, Ted + "March 2000." book - 2.231453 + 1.928196 1 - 1939 - content: title the magic of water author hochschwender ted medium book + 3526 + content: title proposition medium book - Water + 1999 wastewater and drinking water user charge survey 1999 - De Villiers, Marq + "December, 1999." book - Water + name="LOC Solr Test" checksum="1618351359"> + 1999 wastewater and drinking water user charge survey 1999 - De Villiers, Marq + Cover title + "December, 1999." book - 2.186368 + 1.928196 1 - 1902 - content: title water author de villiers marq medium book + 3526 + content: title wastewater and drinking water user charge survey medium book - Water use for public water supply in Michigan, 1998 + Water in press, 1997 + an index of news items on water resources selected from leading news papers + 1998 + With reference to India + book + + Water in press, 1997 + an index of news items on water resources selected from leading news papers + 1998 + Includes index + With reference to India + book + 1.928196 + + 1 + 3526 + content: title water in press medium book + + + Who governs water? + the politics of water resource management + 1999 + Frey, Hans + book + + Who governs water? + the politics of water resource management + 1999 + Frey, Hans + book + 1.928196 + + 1 + 3526 + content: title who governs water author frey hans medium book + + + A Primer on fresh water + questions and answers 2000 - "January 3, 2000." + Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses book - Water use for public water supply in Michigan, 1998 + name="LOC Solr Test" checksum="2684093717"> + A Primer on fresh water + questions and answers 2000 - "January 3, 2000." + Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses + Includes index book - 2.186368 + 2.016555 1 - 1902 - content: title water use for public water supply in michigan medium book + 2319 + content: title a primer on fresh water medium book - Report to the IUCN on water demand management country study - Namibia - 1999 + Water quality assessment of the State Water Project, 1996-97 + 1999-2000 + "September 1999." book - Report to the IUCN on water demand management country study - Namibia + name="LOC Solr Test" checksum="3749836075"> + Water quality assessment of the State Water Project, 1996-97 1999 + Cover title + "September 1999." book - 2.114981 + 2.016555 + + + Water quality assessment of the State Water Project, 1998-99 + 2000 + Cover title + "July 2000." + book + 2.016555 + + 2 + 2319 + content: title water quality assessment of the state water project medium book + + + Wonderful water + 2001 + Glover, David + book + + Wonderful water + 2001 + Glover, David + book + 2.037029 1 - 1843 - content: title report to the iucn on water demand management country study medium book + 2039 + content: title wonderful water author glover david medium book Evaluation and control of water pollution in Bhavani Basin @@ -111,7 +179,7 @@ 2.061328 1 - 1798 + 1707 content: title evaluation and control of water pollution in bhavani basin medium book @@ -130,7 +198,7 @@ 2.061328 1 - 1798 + 1707 content: title unsia water cluster medium book @@ -147,7 +215,7 @@ 2.061328 1 - 1798 + 1707 content: title water and water supplies author thresh john clough medium book @@ -166,7 +234,7 @@ 2.061328 1 - 1798 + 1707 content: title water author majeed abdul medium book @@ -185,7 +253,7 @@ 2.061328 1 - 1798 + 1707 content: title water law author fisher d e medium book @@ -202,164 +270,96 @@ 2.061328 1 - 1798 + 1707 content: title water technology management medium book - Wonderful water - 2001 - Glover, David - book - - Wonderful water - 2001 - Glover, David - book - 2.037029 - - 1 - 1778 - content: title wonderful water author glover david medium book - - - A Primer on fresh water - questions and answers - 2000 - Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses + Report to the IUCN on water demand management country study + Namibia + 1999 book - A Primer on fresh water - questions and answers - 2000 - Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses - Includes index + name="LOC Solr Test" checksum="3717838211"> + Report to the IUCN on water demand management country study + Namibia + 1999 book - 2.016555 + 2.114981 1 - 1761 - content: title a primer on fresh water medium book + 974 + content: title report to the iucn on water demand management country study medium book - Water quality assessment of the State Water Project, 1996-97 - 1999-2000 - "September 1999." + Water + 1999 + De Villiers, Marq book - Water quality assessment of the State Water Project, 1996-97 + name="LOC Solr Test" checksum="488613273"> + Water 1999 - Cover title - "September 1999." - book - 2.016555 - - - Water quality assessment of the State Water Project, 1998-99 - 2000 - Cover title - "July 2000." + De Villiers, Marq book - 2.016555 + 2.186368 - 2 - 1761 - content: title water quality assessment of the state water project medium book + 1 + 0 + content: title water author de villiers marq medium book - District water supply plan + Water use for public water supply in Michigan, 1998 2000 - [1] [No special title] -- [2] Appendixes + "January 3, 2000." book - District water supply plan + name="LOC Solr Test" checksum="2103225742"> + Water use for public water supply in Michigan, 1998 2000 - [1] [No special title] -- [2] Appendixes + "January 3, 2000." book - 1.928196 + 2.186368 1 - 1688 - content: title district water supply plan medium book + 0 + content: title water use for public water supply in michigan medium book - Proposition 13 - Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act + The magic of water + reflection and transparency at the water's edge 2000 - "March 2000." + Hochschwender, Ted book - Proposition 13 - Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act + name="LOC Solr Test" checksum="3168968100"> + The magic of water + reflection and transparency at the water's edge 2000 - "March 2000." - book - 1.928196 - - 1 - 1688 - content: title proposition medium book - - - 1999 wastewater and drinking water user charge survey - 1999 - "December, 1999." - book - - 1999 wastewater and drinking water user charge survey - 1999 - Cover title - "December, 1999." - book - 1.928196 - - 1 - 1688 - content: title wastewater and drinking water user charge survey medium book - - - Water in press, 1997 - an index of news items on water resources selected from leading news papers - 1998 - With reference to India - book - - Water in press, 1997 - an index of news items on water resources selected from leading news papers - 1998 - Includes index - With reference to India + Hochschwender, Ted book - 1.928196 + 2.231453 1 - 1688 - content: title water in press medium book + -616 + content: title the magic of water author hochschwender ted medium book - Who governs water? - the politics of water resource management - 1999 - Frey, Hans + Water management problems and challenges in India + an analytical review + 2000 + Dinesh Kumar, M book - Who governs water? - the politics of water resource management - 1999 - Frey, Hans + name="LOC Solr Test" checksum="1554355631"> + Water management problems and challenges in India + an analytical review + 2000 + Dinesh Kumar, M book - 1.928196 + 2.304635 1 - 1688 - content: title who governs water author frey hans medium book + -1616 + content: title water management problems and challenges in india author dinesh kumar m medium book \ No newline at end of file -- 1.7.10.4