From 1b2621985f8d74b4d093f780fb952ee3d032c87d Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 18 Dec 2013 14:17:53 +0100 Subject: [PATCH] normalizing finally seems to converge --- src/relevance.c | 51 ++++--- test/test_rank.urls | 4 + test/test_rank_12.res | 338 ++++++++++++++++++++--------------------- test/test_rank_settings_4.xml | 33 ++++ 4 files changed, 235 insertions(+), 191 deletions(-) create mode 100644 test/test_rank_settings_4.xml diff --git a/src/relevance.c b/src/relevance.c index 4282d8c..4cbf7f2 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -189,13 +189,15 @@ static double squaresum( struct norm_record *rp, double a, double b) // For each client, normalize scores static void normalize_scores(struct relevance *rel) { - const int maxiterations = 100; - const double enough = 100.0; // sets the number of decimals we are happy with + const int maxiterations = 1000; + const double enough = 1000.0; // sets the number of decimals we are happy with + const double stepchange = 0.5; // reduction of the step size when finding middle + // 0.5 sems to be magical, much better than 0.4 or 0.6 struct norm_client *norm; for ( norm = rel->norm; norm; norm = norm->next ) { - yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d", - norm->num, norm->scorefield, norm->count); + yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d range=%f %f", + norm->num, norm->scorefield, norm->count, norm->min, norm->max); norm->a = 1.0; // default normalizing factors, no change norm->b = 0.0; if ( norm->scorefield != scorefield_none && @@ -206,7 +208,7 @@ static void normalize_scores(struct relevance *rel) double a,b; // params to optimize double as,bs; // step sizes double chi; - char dir = 'a'; + char *branch = "?"; // initial guesses for the parameters if ( range < 1e-6 ) // practically zero range = norm->max; @@ -221,54 +223,59 @@ static void normalize_scores(struct relevance *rel) double aminus= squaresum(norm->records, a-as, b); double bplus = squaresum(norm->records, a, b+bs); double bminus= squaresum(norm->records, a, b-bs); + double prevchi = chi; if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus) { a = a + as; chi = aplus; - yaz_log(YLOG_LOG,"Fitting aplus it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + as = as * (1.0 + stepchange); + branch = "aplus "; } else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus) { a = a - as; chi = aminus; - yaz_log(YLOG_LOG,"Fitting aminus it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + as = as * (1.0 + stepchange); + branch = "aminus"; } else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus) { b = b + bs; chi = bplus; - yaz_log(YLOG_LOG,"Fitting bplus it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + bs = bs * (1.0 + stepchange); + branch = "bplus "; } else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus) { b = b - bs; chi = bminus; - yaz_log(YLOG_LOG,"Fitting bminus it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + branch = "bminus"; + bs = bs * (1.0+stepchange); } else - { - if ( as > bs ) + { // a,b is the best so far, adjust one step size + // which one? The one that has the greatest effect to chi + // That is, the average of plus and minus is further away from chi + double adif = 0.5 * ( aplus + aminus ) - prevchi; + double bdif = 0.5 * ( bplus + bminus ) - prevchi; + if ( fabs(adif) > fabs(bdif) ) { - as = as / 2; - yaz_log(YLOG_LOG,"Fitting step a it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + as = as * ( 1.0 - stepchange); + branch = "step a"; } else { - bs = bs / 2; - yaz_log(YLOG_LOG,"Fitting step b it=%d: a=%f / %f b=%f / %f chi = %f", - it, a, as, b, bs, chi ); + bs = bs * ( 1.0 - stepchange); + branch = "step b"; } } + yaz_log(YLOG_LOG,"Fitting %s it=%d: a=%f %f b=%f %f chi=%f ap=%f am=%f, bp=%f bm=%f p=%f", + branch, it, a, as, b, bs, chi, + aplus, aminus, bplus, bminus, prevchi ); norm->a = a; norm->b = b; if ( fabs(as) * enough < fabs(a) && fabs(bs) * enough < fabs(b) ) { - yaz_log(YLOG_LOG,"Fitting done: stopping loop at %d" , it ); break; // not changing much any more } diff --git a/test/test_rank.urls b/test/test_rank.urls index 4a8ac44..b08b8a0 100644 --- a/test/test_rank.urls +++ b/test/test_rank.urls @@ -10,3 +10,7 @@ http://localhost:9763/search.pz2?session=1&command=init test_rank_settings_3.xml http://localhost:9763/search.pz2?session=1&command=settings http://localhost:9763/search.pz2?session=1&command=search&query=water&sort=relevance 2 http://localhost:9763/search.pz2?session=1&command=show&sort=relevance +http://localhost:9763/search.pz2?session=1&command=init +test_rank_settings_4.xml http://localhost:9763/search.pz2?session=1&command=settings +http://localhost:9763/search.pz2?session=1&command=search&query=water&sort=relevance +2 http://localhost:9763/search.pz2?session=1&command=show&sort=relevance diff --git a/test/test_rank_12.res b/test/test_rank_12.res index ce588ec..f03b6fc 100644 --- a/test/test_rank_12.res +++ b/test/test_rank_12.res @@ -6,161 +6,93 @@ 0 19 - District water supply plan + Water management problems and challenges in India + an analytical review 2000 - [1] [No special title] -- [2] Appendixes + Dinesh Kumar, M book - District water supply plan + name="LOC Solr Test" checksum="1554355631"> + Water management problems and challenges in India + an analytical review 2000 - [1] [No special title] -- [2] Appendixes + Dinesh Kumar, M book - 1.928196 + 2.304635 1 - 3526 - content: title district water supply plan medium book + 6645 + content: title water management problems and challenges in india author dinesh kumar m medium book - Proposition 13 - Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act + The magic of water + reflection and transparency at the water's edge 2000 - "March 2000." + Hochschwender, Ted book - Proposition 13 - Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act + name="LOC Solr Test" checksum="3168968100"> + The magic of water + reflection and transparency at the water's edge 2000 - "March 2000." - book - 1.928196 - - 1 - 3526 - content: title proposition medium book - - - 1999 wastewater and drinking water user charge survey - 1999 - "December, 1999." - book - - 1999 wastewater and drinking water user charge survey - 1999 - Cover title - "December, 1999." - book - 1.928196 - - 1 - 3526 - content: title wastewater and drinking water user charge survey medium book - - - Water in press, 1997 - an index of news items on water resources selected from leading news papers - 1998 - With reference to India - book - - Water in press, 1997 - an index of news items on water resources selected from leading news papers - 1998 - Includes index - With reference to India + Hochschwender, Ted book - 1.928196 + 2.231453 1 - 3526 - content: title water in press medium book + 5215 + content: title the magic of water author hochschwender ted medium book - Who governs water? - the politics of water resource management + Water 1999 - Frey, Hans + De Villiers, Marq book - Who governs water? - the politics of water resource management + name="LOC Solr Test" checksum="488613273"> + Water 1999 - Frey, Hans + De Villiers, Marq book - 1.928196 + 2.186368 1 - 3526 - content: title who governs water author frey hans medium book + 4334 + content: title water author de villiers marq medium book - A Primer on fresh water - questions and answers + Water use for public water supply in Michigan, 1998 2000 - Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses + "January 3, 2000." book - A Primer on fresh water - questions and answers + name="LOC Solr Test" checksum="2103225742"> + Water use for public water supply in Michigan, 1998 2000 - Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses - Includes index + "January 3, 2000." book - 2.016555 + 2.186368 1 - 2319 - content: title a primer on fresh water medium book + 4334 + content: title water use for public water supply in michigan medium book - Water quality assessment of the State Water Project, 1996-97 - 1999-2000 - "September 1999." + Report to the IUCN on water demand management country study + Namibia + 1999 book - Water quality assessment of the State Water Project, 1996-97 + name="LOC Solr Test" checksum="3717838211"> + Report to the IUCN on water demand management country study + Namibia 1999 - Cover title - "September 1999." book - 2.016555 - - - Water quality assessment of the State Water Project, 1998-99 - 2000 - Cover title - "July 2000." - book - 2.016555 - - 2 - 2319 - content: title water quality assessment of the state water project medium book - - - Wonderful water - 2001 - Glover, David - book - - Wonderful water - 2001 - Glover, David - book - 2.037029 + 2.114981 1 - 2039 - content: title wonderful water author glover david medium book + 2939 + content: title report to the iucn on water demand management country study medium book Evaluation and control of water pollution in Bhavani Basin @@ -179,7 +111,7 @@ 2.061328 1 - 1707 + 1890 content: title evaluation and control of water pollution in bhavani basin medium book @@ -198,7 +130,7 @@ 2.061328 1 - 1707 + 1890 content: title unsia water cluster medium book @@ -215,7 +147,7 @@ 2.061328 1 - 1707 + 1890 content: title water and water supplies author thresh john clough medium book @@ -234,7 +166,7 @@ 2.061328 1 - 1707 + 1890 content: title water author majeed abdul medium book @@ -253,7 +185,7 @@ 2.061328 1 - 1707 + 1890 content: title water law author fisher d e medium book @@ -270,96 +202,164 @@ 2.061328 1 - 1707 + 1890 content: title water technology management medium book - Report to the IUCN on water demand management country study - Namibia - 1999 + Wonderful water + 2001 + Glover, David book - Report to the IUCN on water demand management country study - Namibia - 1999 + name="LOC Solr Test" checksum="2135223606"> + Wonderful water + 2001 + Glover, David book - 2.114981 + 2.037029 1 - 974 - content: title report to the iucn on water demand management country study medium book + 1415 + content: title wonderful water author glover david medium book - Water - 1999 - De Villiers, Marq + A Primer on fresh water + questions and answers + 2000 + Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses book - Water - 1999 - De Villiers, Marq + name="LOC Solr Test" checksum="2684093717"> + A Primer on fresh water + questions and answers + 2000 + Issued also in French under title: Notions élémentaires sur l'eau douce : questions et réponses + Includes index book - 2.186368 + 2.016555 1 - 0 - content: title water author de villiers marq medium book + 1015 + content: title a primer on fresh water medium book - Water use for public water supply in Michigan, 1998 - 2000 - "January 3, 2000." + Water quality assessment of the State Water Project, 1996-97 + 1999-2000 + "September 1999." book - Water use for public water supply in Michigan, 1998 + name="LOC Solr Test" checksum="3749836075"> + Water quality assessment of the State Water Project, 1996-97 + 1999 + Cover title + "September 1999." + book + 2.016555 + + + Water quality assessment of the State Water Project, 1998-99 2000 - "January 3, 2000." + Cover title + "July 2000." book - 2.186368 + 2.016555 - 1 - 0 - content: title water use for public water supply in michigan medium book + 2 + 1015 + content: title water quality assessment of the state water project medium book - The magic of water - reflection and transparency at the water's edge + District water supply plan 2000 - Hochschwender, Ted + [1] [No special title] -- [2] Appendixes book - The magic of water - reflection and transparency at the water's edge + name="LOC Solr Test" checksum="552609001"> + District water supply plan 2000 - Hochschwender, Ted + [1] [No special title] -- [2] Appendixes book - 2.231453 + 1.928196 1 - -616 - content: title the magic of water author hochschwender ted medium book + -710 + content: title district water supply plan medium book - Water management problems and challenges in India - an analytical review + Proposition 13 + Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act 2000 - Dinesh Kumar, M + "March 2000." book - Water management problems and challenges in India - an analytical review + name="LOC Solr Test" checksum="3232963828"> + Proposition 13 + Safe Drinking Water, Clean Water, Watershed Protection, and Flood Protection Act 2000 - Dinesh Kumar, M + "March 2000." book - 2.304635 + 1.928196 1 - -1616 - content: title water management problems and challenges in india author dinesh kumar m medium book + -710 + content: title proposition medium book + + + 1999 wastewater and drinking water user charge survey + 1999 + "December, 1999." + book + + 1999 wastewater and drinking water user charge survey + 1999 + Cover title + "December, 1999." + book + 1.928196 + + 1 + -710 + content: title wastewater and drinking water user charge survey medium book + + + Water in press, 1997 + an index of news items on water resources selected from leading news papers + 1998 + With reference to India + book + + Water in press, 1997 + an index of news items on water resources selected from leading news papers + 1998 + Includes index + With reference to India + book + 1.928196 + + 1 + -710 + content: title water in press medium book + + + Who governs water? + the politics of water resource management + 1999 + Frey, Hans + book + + Who governs water? + the politics of water resource management + 1999 + Frey, Hans + book + 1.928196 + + 1 + -710 + content: title who governs water author frey hans medium book \ No newline at end of file diff --git a/test/test_rank_settings_4.xml b/test/test_rank_settings_4.xml new file mode 100644 index 0000000..edd80df --- /dev/null +++ b/test/test_rank_settings_4.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 1.7.10.4