From fb19bc317370b4f4b508088b034b4c2d19257042 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 13 Dec 2013 13:57:52 +0100 Subject: [PATCH] New metadata type, float, for metadata score PAZ-908 --- src/http_command.c | 3 +++ src/pazpar2_config.c | 42 +++++++++++++++++++++----------- src/pazpar2_config.h | 18 ++++++-------- src/reclists.c | 65 +++++++++++++++++++++++++++++++------------------- src/reclists.h | 2 +- src/record.c | 3 +++ src/record.h | 1 + src/session.c | 28 +++++++++++++--------- 8 files changed, 101 insertions(+), 61 deletions(-) diff --git a/src/http_command.c b/src/http_command.c index 1273b8b..1de0354 100644 --- a/src/http_command.c +++ b/src/http_command.c @@ -925,6 +925,9 @@ static void write_metadata(WRBUF w, struct conf_service *service, if (md->data.number.min != md->data.number.max) wrbuf_printf(w, "-%d", md->data.number.max); break; + case Metadata_type_float: + wrbuf_printf(w, "%f", md->data.fnumber); + break; default: wrbuf_puts(w, "[can't represent]"); break; diff --git a/src/pazpar2_config.c b/src/pazpar2_config.c index 8068b13..2d747ca 100644 --- a/src/pazpar2_config.c +++ b/src/pazpar2_config.c @@ -207,7 +207,7 @@ static struct conf_sortkey *conf_service_add_sortkey( struct conf_service *service, int field_id, const char *name, - enum conf_sortkey_type type) + enum conf_metadata_type type) { struct conf_sortkey *sk = 0; NMEM nmem = service->nmem; @@ -392,6 +392,8 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, type = Metadata_type_year; else if (!strcmp((const char *) xml_type, "date")) type = Metadata_type_date; + else if (!strcmp((const char *) xml_type, "float")) + type = Metadata_type_float; else { yaz_log(YLOG_FATAL, @@ -433,7 +435,7 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, else { yaz_log(YLOG_FATAL, - "Unknown value for medadata/setting: %s", xml_setting); + "Unknown value for metadata/setting: %s", xml_setting); return -1; } } @@ -441,17 +443,29 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, // add a sortkey if so specified if (xml_sortkey && strcmp((const char *) xml_sortkey, "no")) { - enum conf_sortkey_type sk_type; + enum conf_metadata_type sk_type = type; if (merge == Metadata_merge_no) { yaz_log(YLOG_FATAL, "Can't specify sortkey on a non-merged field"); return -1; } + if (!strcmp((const char *) xml_sortkey, "yes")) + ; if (!strcmp((const char *) xml_sortkey, "numeric")) - sk_type = Metadata_sortkey_numeric; + ; else if (!strcmp((const char *) xml_sortkey, "skiparticle")) - sk_type = Metadata_sortkey_skiparticle; + { + if (sk_type == Metadata_type_generic) + sk_type = Metadata_type_skiparticle; + else + { + yaz_log(YLOG_FATAL, + "skiparticle only supported for type=generic: %s", + xml_type); + return -1; + } + } else { yaz_log(YLOG_FATAL, @@ -1014,21 +1028,18 @@ static void info_service_metadata(struct conf_service *service, WRBUF w) if (md->sortkey_offset > 0) { wrbuf_puts(w, " sortkey=\""); switch (service->sortkeys[md->sortkey_offset].type) { - case Metadata_sortkey_relevance: + case Metadata_type_relevance: wrbuf_puts(w, "relevance"); break; - case Metadata_sortkey_numeric: - wrbuf_puts(w, "numeric"); - break; - case Metadata_sortkey_skiparticle: + case Metadata_type_skiparticle: wrbuf_puts(w, "skiparticle"); break; - case Metadata_sortkey_string: - wrbuf_puts(w, "string"); - break; - case Metadata_sortkey_position: + case Metadata_type_position: wrbuf_puts(w, "position"); break; + default: + wrbuf_puts(w, "yes"); + break; } wrbuf_puts(w, "\""); } @@ -1042,6 +1053,9 @@ static void info_service_metadata(struct conf_service *service, WRBUF w) case Metadata_type_date: wrbuf_puts(w, " type=\"date\""); break; + case Metadata_type_float: + wrbuf_puts(w, " type=\"float\""); + break; } switch (md->merge) { diff --git a/src/pazpar2_config.h b/src/pazpar2_config.h index ee195b9..6469f41 100644 --- a/src/pazpar2_config.h +++ b/src/pazpar2_config.h @@ -32,8 +32,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA enum conf_metadata_type { Metadata_type_generic, // Generic text field - Metadata_type_year, // A number - Metadata_type_date // A number + Metadata_type_year, // year YYYY - YYYY + Metadata_type_date, // date YYYYMMDD - YYYYMMDD + Metadata_type_float, // float number + Metadata_type_skiparticle, + Metadata_type_relevance, + Metadata_type_position, }; enum conf_metadata_merge { @@ -45,14 +49,6 @@ enum conf_metadata_merge { Metadata_merge_first // All from first target }; -enum conf_sortkey_type { - Metadata_sortkey_relevance, - Metadata_sortkey_numeric, // Standard numerical sorting - Metadata_sortkey_skiparticle, // Skip leading article when sorting - Metadata_sortkey_string, // Flat string - Metadata_sortkey_position // Position -}; - // This controls the ability to insert 'static' values from settings into retrieval recs enum conf_setting_type { Metadata_setting_no, @@ -94,7 +90,7 @@ struct conf_metadata struct conf_sortkey { char *name; - enum conf_sortkey_type type; + enum conf_metadata_type type; }; struct conf_server; diff --git a/src/reclists.c b/src/reclists.c index 5822dbe..1b43169 100644 --- a/src/reclists.c +++ b/src/reclists.c @@ -84,7 +84,7 @@ struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms, int increasing = 0; int i; int offset = 0; - enum conf_sortkey_type type = Metadata_sortkey_string; + enum conf_metadata_type type = Metadata_type_generic; struct reclist_sortparms *new; if (!(cpp = strchr(parms, ','))) @@ -107,21 +107,21 @@ struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms, if (pp[2]) { if (pp[2] == 'p') - type = Metadata_sortkey_position; + type = Metadata_type_position; else yaz_log(YLOG_FATAL, "Bad sortkey modifier: %s", parm); } *pp = '\0'; } - if (type != Metadata_sortkey_position) + if (type != Metadata_type_position) { if (!strcmp(parm, "relevance")) { - type = Metadata_sortkey_relevance; + type = Metadata_type_relevance; } else if (!strcmp(parm, "position")) { - type = Metadata_sortkey_position; + type = Metadata_type_position; } else { @@ -131,8 +131,6 @@ struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms, if (!strcmp(sk->name, parm)) { type = sk->type; - if (type == Metadata_sortkey_skiparticle) - type = Metadata_sortkey_string; break; } } @@ -173,35 +171,38 @@ static int reclist_cmp(const void *p1, const void *p2) const char *s1, *s2; switch (s->type) { - case Metadata_sortkey_relevance: - res = r2->relevance_score - r1->relevance_score; + case Metadata_type_relevance: + res = r1->relevance_score - r2->relevance_score; break; - case Metadata_sortkey_string: + case Metadata_type_generic: + case Metadata_type_skiparticle: s1 = ut1 ? ut1->text.sort : ""; s2 = ut2 ? ut2->text.sort : ""; - res = strcmp(s2, s1); - if (res) - { - if (s->increasing) - res *= -1; - } + res = strcmp(s1, s2); break; - case Metadata_sortkey_numeric: + case Metadata_type_year: + case Metadata_type_date: if (ut1 && ut2) { if (s->increasing) res = ut1->number.min - ut2->number.min; else - res = ut2->number.max - ut1->number.max; + res = ut1->number.max - ut2->number.max; } else if (ut1 && !ut2) - res = -1; + { + res = -1; /* without date/year: last! */ + continue; + } else if (!ut1 && ut2) - res = 1; + { + res = 1; /* without date/year: last! */ + continue; + } else res = 0; break; - case Metadata_sortkey_position: + case Metadata_type_position: if (r1->records && r2->records) { int pos1 = 0, pos2 = 0; @@ -213,13 +214,29 @@ static int reclist_cmp(const void *p1, const void *p2) if (pos2 == 0 || rec->position < pos2) pos2 = rec->position; res = pos1 - pos2; + continue; } break; - default: - yaz_log(YLOG_WARN, "Bad sort type: %d", s->type); - res = 0; + case Metadata_type_float: + if (ut1 && ut2) + { + if (ut1->fnumber == ut2->fnumber) + res = 0; + else if (ut1->fnumber > ut2->fnumber) + res = 1; + else + res = -1; + } + else if (ut1) + res = 1; + else if (ut2) + res = -1; + else + res = 0; break; } + if (res && !s->increasing) + res *= -1; } if (res == 0) res = strcmp(r1->recid, r2->recid); diff --git a/src/reclists.h b/src/reclists.h index 7d5e4ab..e10ffdc 100644 --- a/src/reclists.h +++ b/src/reclists.h @@ -30,7 +30,7 @@ struct reclist; struct reclist_sortparms { int offset; - enum conf_sortkey_type type; + enum conf_metadata_type type; int increasing; char *name; struct reclist_sortparms *next; diff --git a/src/record.c b/src/record.c index 53b816a..1b9d1e3 100644 --- a/src/record.c +++ b/src/record.c @@ -123,6 +123,9 @@ int record_compare(struct record *r1, struct record *r2, m1->data.number.max != m2->data.number.max) return 0; break; + case Metadata_type_float: + if (m1->data.fnumber != m2->data.fnumber) + return 0; } m1 = m1->next; m2 = m2->next; diff --git a/src/record.h b/src/record.h index b06ad14..ada04d2 100644 --- a/src/record.h +++ b/src/record.h @@ -34,6 +34,7 @@ union data_types { int min; int max; } number; + double fnumber; }; diff --git a/src/session.c b/src/session.c index 9867d8c..a3862c1 100644 --- a/src/session.c +++ b/src/session.c @@ -1314,7 +1314,7 @@ struct record_cluster **show_range_start(struct session *se, if (se->relevance) { for (spp = sp; spp; spp = spp->next) - if (spp->type == Metadata_sortkey_relevance) + if (spp->type == Metadata_type_relevance) { relevance_prepare_read(se->relevance, se->reclist); break; @@ -1455,17 +1455,17 @@ static struct record_metadata *record_metadata_init( } *attrp = 0; - if (type == Metadata_type_generic) + switch (type) { - char *p = nmem_strdup(nmem, value); - - p = normalize7bit_generic(p, " ,/.:(["); - - rec_md->data.text.disp = p; + case Metadata_type_generic: + case Metadata_type_skiparticle: + rec_md->data.text.disp = + normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:(["); rec_md->data.text.sort = 0; rec_md->data.text.snippet = 0; - } - else if (type == Metadata_type_year || type == Metadata_type_date) + break; + case Metadata_type_year: + case Metadata_type_date: { int first, last; int longdate = 0; @@ -1478,8 +1478,14 @@ static struct record_metadata *record_metadata_init( rec_md->data.number.min = first; rec_md->data.number.max = last; } - else + break; + case Metadata_type_float: + rec_md->data.fnumber = atof(value); + break; + case Metadata_type_relevance: + case Metadata_type_position: return 0; + } return rec_md; } @@ -2214,7 +2220,7 @@ static int ingest_to_cluster(struct client *cl, { const char *sort_str = 0; int skip_article = - ser_sk->type == Metadata_sortkey_skiparticle; + ser_sk->type == Metadata_type_skiparticle; if (!cluster->sortkeys[sk_field_id]) cluster->sortkeys[sk_field_id] = -- 1.7.10.4