projects
/
pazpar2-moved-to-github.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fitting: better initial guess
[pazpar2-moved-to-github.git]
/
src
/
relevance.c
diff --git
a/src/relevance.c
b/src/relevance.c
index
4282d8c
..
02dc971
100644
(file)
--- a/
src/relevance.c
+++ b/
src/relevance.c
@@
-165,8
+165,8
@@
static void setup_norm_record( struct relevance *rel, struct record_cluster *cl
} else {
if ( rp->score > norm->max )
norm->max = rp->score;
} else {
if ( rp->score > norm->max )
norm->max = rp->score;
- if ( rp->score < norm->min && abs(rp->score) < 1e-6 )
- norm->min = rp->score; // skip zeroes
+ if ( rp->score < norm->min )
+ norm->min = rp->score;
}
}
}
}
}
}
@@
-189,13
+189,16
@@
static double squaresum( struct norm_record *rp, double a, double b)
// For each client, normalize scores
static void normalize_scores(struct relevance *rel)
{
// For each client, normalize scores
static void normalize_scores(struct relevance *rel)
{
- const int maxiterations = 100;
+ const int maxiterations = 1000;
const double enough = 100.0; // sets the number of decimals we are happy with
const double enough = 100.0; // sets the number of decimals we are happy with
+ const double stepchange = 0.5; // reduction of the step size when finding middle
+ // 0.5 sems to be magical, much better than 0.4 or 0.6
struct norm_client *norm;
for ( norm = rel->norm; norm; norm = norm->next )
{
struct norm_client *norm;
for ( norm = rel->norm; norm; norm = norm->next )
{
- yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d",
- norm->num, norm->scorefield, norm->count);
+ yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d range=%f %f = %f",
+ norm->num, norm->scorefield, norm->count, norm->min,
+ norm->max, norm->max-norm->min);
norm->a = 1.0; // default normalizing factors, no change
norm->b = 0.0;
if ( norm->scorefield != scorefield_none &&
norm->a = 1.0; // default normalizing factors, no change
norm->b = 0.0;
if ( norm->scorefield != scorefield_none &&
@@
-206,79
+209,93
@@
static void normalize_scores(struct relevance *rel)
double a,b; // params to optimize
double as,bs; // step sizes
double chi;
double a,b; // params to optimize
double as,bs; // step sizes
double chi;
- char dir = 'a';
+ char *branch = "?";
// initial guesses for the parameters
// initial guesses for the parameters
+ // Rmax = a * rmax + b # want to be 1.0
+ // Rmin = a * rmin + b # want to be 0.0
+ // Rmax - Rmin = a ( rmax - rmin ) # subtracting equations
+ // 1.0 - 0.0 = a ( rmax - rmin )
+ // a = 1 / range
+ // Rmin = a * rmin + b
+ // b = Rmin - a * rmin
+ // = 0.0 - 1/range * rmin
+ // = - rmin / range
+
if ( range < 1e-6 ) // practically zero
range = norm->max;
a = 1.0 / range;
if ( range < 1e-6 ) // practically zero
range = norm->max;
a = 1.0 / range;
- b = abs(norm->min);
+ b = -1.0 * norm->min / range;
+ // b = fabs(norm->min) / range;
as = a / 10;
as = a / 10;
- bs = b / 10;
+ bs = fabs(b) / 10;
chi = squaresum( norm->records, a,b);
chi = squaresum( norm->records, a,b);
+ yaz_log(YLOG_LOG,"Initial done: it=%d: a=%f / %f b=%f / %f chi = %f",
+ 0, a, as, b, bs, chi );
while (it++ < maxiterations) // safeguard against things not converging
{
double aplus = squaresum(norm->records, a+as, b);
double aminus= squaresum(norm->records, a-as, b);
double bplus = squaresum(norm->records, a, b+bs);
double bminus= squaresum(norm->records, a, b-bs);
while (it++ < maxiterations) // safeguard against things not converging
{
double aplus = squaresum(norm->records, a+as, b);
double aminus= squaresum(norm->records, a-as, b);
double bplus = squaresum(norm->records, a, b+bs);
double bminus= squaresum(norm->records, a, b-bs);
+ double prevchi = chi;
if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus)
{
a = a + as;
chi = aplus;
if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus)
{
a = a + as;
chi = aplus;
- yaz_log(YLOG_LOG,"Fitting aplus it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ as = as * (1.0 + stepchange);
+ branch = "aplus ";
}
else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus)
{
a = a - as;
chi = aminus;
}
else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus)
{
a = a - as;
chi = aminus;
- yaz_log(YLOG_LOG,"Fitting aminus it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ as = as * (1.0 + stepchange);
+ branch = "aminus";
}
else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus)
{
b = b + bs;
chi = bplus;
}
else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus)
{
b = b + bs;
chi = bplus;
- yaz_log(YLOG_LOG,"Fitting bplus it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ bs = bs * (1.0 + stepchange);
+ branch = "bplus ";
}
else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus)
{
b = b - bs;
chi = bminus;
}
else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus)
{
b = b - bs;
chi = bminus;
- yaz_log(YLOG_LOG,"Fitting bminus it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ branch = "bminus";
+ bs = bs * (1.0+stepchange);
}
else
}
else
- {
- if ( as > bs )
+ { // a,b is the best so far, adjust one step size
+ // which one? The one that has the greatest effect to chi
+ // That is, the average of plus and minus is further away from chi
+ double adif = 0.5 * ( aplus + aminus ) - prevchi;
+ double bdif = 0.5 * ( bplus + bminus ) - prevchi;
+ if ( fabs(adif) > fabs(bdif) )
{
{
- as = as / 2;
- yaz_log(YLOG_LOG,"Fitting step a it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ as = as * ( 1.0 - stepchange);
+ branch = "step a";
}
else
{
}
else
{
- bs = bs / 2;
- yaz_log(YLOG_LOG,"Fitting step b it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ bs = bs * ( 1.0 - stepchange);
+ branch = "step b";
}
}
}
}
+ yaz_log(YLOG_LOG,"Fitting %s it=%d: a=%g %g b=%g %g chi=%g ap=%g am=%g, bp=%g bm=%g p=%g",
+ branch, it, a, as, b, bs, chi,
+ aplus, aminus, bplus, bminus, prevchi );
norm->a = a;
norm->b = b;
if ( fabs(as) * enough < fabs(a) &&
fabs(bs) * enough < fabs(b) ) {
norm->a = a;
norm->b = b;
if ( fabs(as) * enough < fabs(a) &&
fabs(bs) * enough < fabs(b) ) {
- yaz_log(YLOG_LOG,"Fitting done: stopping loop at %d" , it );
break; // not changing much any more
}
}
break; // not changing much any more
}
}
- yaz_log(YLOG_LOG,"Fitting done: it=%d: a=%f / %f b=%f / %f chi = %f",
+ yaz_log(YLOG_LOG,"Fitting done: it=%d: a=%g / %g b=%g / %g chi = %g",
it-1, a, as, b, bs, chi );
it-1, a, as, b, bs, chi );
- yaz_log(YLOG_LOG," a: %f < %f %d",
- fabs(as)*enough, fabs(a), (fabs(as) * enough < fabs(a)) );
- yaz_log(YLOG_LOG," b: %f < %f %d",
- fabs(bs)*enough, fabs(b), (fabs(bs) * enough < fabs(b)) );
}
if ( norm->scorefield != scorefield_none )
}
if ( norm->scorefield != scorefield_none )