From 1a3f3cfb2f6aeb743716924d9081d6dce981b49f Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 19 Jun 2012 11:18:01 +0200 Subject: [PATCH] per-field weight is part of "rank" attribute content For example, rank="2 au 3" gives a rank factor of 3 for terms in au=terms and a rank factor of 2 otherwise. --- doc/common | 2 +- src/pazpar2_config.c | 29 +++++++++-------------------- src/pazpar2_config.h | 5 +---- src/relevance.c | 24 +++++++++++++++--------- src/relevance.h | 4 ++-- src/session.c | 21 ++++++++------------- test/test_http.cfg | 2 +- 7 files changed, 37 insertions(+), 50 deletions(-) diff --git a/doc/common b/doc/common index c6e4db9..92b4d40 160000 --- a/doc/common +++ b/doc/common @@ -1 +1 @@ -Subproject commit c6e4db90ce6779ee42fafd38e776176d38c2844b +Subproject commit 92b4d4032574351cda7ab8762889dea6699bff94 diff --git a/src/pazpar2_config.c b/src/pazpar2_config.c index d992a33..b2064f9 100644 --- a/src/pazpar2_config.c +++ b/src/pazpar2_config.c @@ -72,12 +72,11 @@ static void conf_metadata_assign(NMEM nmem, enum conf_setting_type setting, int brief, int termlist, - int rank, + const char *rank, int sortkey_offset, enum conf_metadata_mergekey mt, const char *facetrule, - const char *limitmap, - const char *frank) + const char *limitmap) { assert(nmem && metadata && name); @@ -94,12 +93,11 @@ static void conf_metadata_assign(NMEM nmem, metadata->setting = setting; metadata->brief = brief; metadata->termlist = termlist; - metadata->rank = rank; + metadata->rank = nmem_strdup_null(nmem, rank); metadata->sortkey_offset = sortkey_offset; metadata->mergekey = mt; metadata->facetrule = nmem_strdup_null(nmem, facetrule); metadata->limitmap = nmem_strdup_null(nmem, limitmap); - metadata->frank = nmem_strdup_null(nmem, frank); } @@ -175,12 +173,11 @@ static struct conf_metadata* conf_service_add_metadata( enum conf_setting_type setting, int brief, int termlist, - int rank, + const char *rank, int sortkey_offset, enum conf_metadata_mergekey mt, const char *facetrule, - const char *limitmap, - const char *frank + const char *limitmap ) { struct conf_metadata * md = 0; @@ -192,7 +189,7 @@ static struct conf_metadata* conf_service_add_metadata( md = service->metadata + field_id; conf_metadata_assign(service->nmem, md, name, type, merge, setting, brief, termlist, rank, sortkey_offset, - mt, facetrule, limitmap, frank); + mt, facetrule, limitmap); return md; } @@ -284,7 +281,6 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, enum conf_metadata_mergekey mergekey_type = Metadata_mergekey_no; int brief = 0; int termlist = 0; - int rank = 0; int sortkey_offset = 0; xmlChar *xml_name = 0; xmlChar *xml_brief = 0; @@ -297,7 +293,6 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, xmlChar *xml_mergekey = 0; xmlChar *xml_limitmap = 0; xmlChar *xml_icu_chain = 0; - xmlChar *xml_frank = 0; struct _xmlAttr *attr; for (attr = n->properties; attr; attr = attr->next) @@ -335,9 +330,6 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, else if (!xmlStrcmp(attr->name, BAD_CAST "limitmap") && attr->children && attr->children->type == XML_TEXT_NODE) xml_limitmap = attr->children->content; - else if (!xmlStrcmp(attr->name, BAD_CAST "frank") && - attr->children && attr->children->type == XML_TEXT_NODE) - xml_frank = attr->children->content; else { yaz_log(YLOG_FATAL, "Unknown metadata attribute '%s'", attr->name); @@ -373,9 +365,6 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, } } - if (xml_rank) - rank = atoi((const char *) xml_rank); - if (xml_type) { if (!strcmp((const char *) xml_type, "generic")) @@ -477,11 +466,11 @@ static int parse_metadata(struct conf_service *service, xmlNode *n, conf_service_add_metadata(service, *md_node, (const char *) xml_name, type, merge, setting, - brief, termlist, rank, sortkey_offset, + brief, termlist, + (const char *) xml_rank, sortkey_offset, mergekey_type, (const char *) xml_icu_chain, - (const char *) xml_limitmap, - (const char *) xml_frank); + (const char *) xml_limitmap); (*md_node)++; return 0; } diff --git a/src/pazpar2_config.h b/src/pazpar2_config.h index 18dfde6..193adcf 100644 --- a/src/pazpar2_config.h +++ b/src/pazpar2_config.h @@ -73,9 +73,7 @@ struct conf_metadata char *name; // The field name. Output by normalization stylesheet int brief; // Is this element to be returned in the brief format? int termlist;// Is this field to be treated as a termlist for browsing? - int rank; // Rank factor. 0 means don't use this field for ranking, - // 1 is default - // values >1 give additional significance to a field + const char *rank; int sortkey_offset; // -1 if it's not a sortkey, otherwise index // into service/record_cluster->sortkey array enum conf_metadata_type type; @@ -85,7 +83,6 @@ struct conf_metadata char *facetrule; char *limitmap; // Should be expanded into service-wide default e.g. pz:limitmap:=value setting - char *frank; }; diff --git a/src/relevance.c b/src/relevance.c index 7f1943a..bc9cb1a 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -45,18 +45,23 @@ struct word_entry { }; static int word_entry_match(struct word_entry *entries, const char *norm_str, - const char *frank, int *local_mult) + const char *rank, int *mult) { for (; entries; entries = entries->next) { if (!strcmp(norm_str, entries->norm_str)) { const char *cp = 0; - if (frank && (cp = strchr(frank, ' '))) + int no_read = 0; + sscanf(rank, "%d%n", mult, &no_read); + rank += no_read; + while (*rank == ' ') + rank++; + if (no_read > 0 && (cp = strchr(rank, ' '))) { - if ((cp - frank) == strlen(entries->ccl_field) && - memcmp(entries->ccl_field, frank, cp - frank) == 0) - *local_mult = atoi(cp + 1); + if ((cp - rank) == strlen(entries->ccl_field) && + memcmp(entries->ccl_field, rank, cp - rank) == 0) + *mult = atoi(cp + 1); } return entries->termno; } @@ -65,8 +70,8 @@ static int word_entry_match(struct word_entry *entries, const char *norm_str, } void relevance_countwords(struct relevance *r, struct record_cluster *cluster, - const char *words, int multiplier, const char *name, - const char *frank) + const char *words, const char *rank, + const char *name) { int *mult = cluster->term_frequency_vec_tmp; const char *norm_str; @@ -75,10 +80,11 @@ void relevance_countwords(struct relevance *r, struct record_cluster *cluster, for (i = 1; i < r->vec_len; i++) mult[i] = 0; + assert(rank); while ((norm_str = pp2_charset_token_next(r->prt))) { - int local_mult = multiplier; - int res = word_entry_match(r->entries, norm_str, frank, &local_mult); + int local_mult = 0; + int res = word_entry_match(r->entries, norm_str, rank, &local_mult); if (res) { assert(res < r->vec_len); diff --git a/src/relevance.h b/src/relevance.h index b22a7a0..35949c0 100644 --- a/src/relevance.h +++ b/src/relevance.h @@ -33,8 +33,8 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, void relevance_destroy(struct relevance **rp); void relevance_newrec(struct relevance *r, struct record_cluster *cluster); void relevance_countwords(struct relevance *r, struct record_cluster *cluster, - const char *words, int multiplier, const char *name, - const char *frank); + const char *words, const char *multiplier, + const char *name); void relevance_donerecord(struct relevance *r, struct record_cluster *cluster); void relevance_prepare_read(struct relevance *rel, struct reclist *rec); diff --git a/src/session.c b/src/session.c index 4049360..18ba514 100644 --- a/src/session.c +++ b/src/session.c @@ -1812,8 +1812,8 @@ static int ingest_to_cluster(struct client *cl, struct record_metadata *rec_md = 0; int md_field_id = -1; int sk_field_id = -1; - int rank = 0; - xmlChar *rank_str = 0; + const char *rank; + xmlChar *xml_rank; type = xmlGetProp(n, (xmlChar *) "type"); value = xmlNodeListGetString(xdoc, n->children, 1); @@ -1828,15 +1828,6 @@ static int ingest_to_cluster(struct client *cl, ser_md = &service->metadata[md_field_id]; - rank_str = xmlGetProp(n, (xmlChar *) "rank"); - if (rank_str) - { - rank = atoi((const char *) rank_str); - xmlFree(rank_str); - } - else - rank = ser_md->rank; - if (ser_md->sortkey_offset >= 0) { sk_field_id = ser_md->sortkey_offset; @@ -1849,6 +1840,9 @@ static int ingest_to_cluster(struct client *cl, if (!rec_md) continue; + xml_rank = xmlGetProp(n, (xmlChar *) "rank"); + rank = xml_rank ? (const char *) xml_rank : ser_md->rank; + wheretoput = &cluster->metadata[md_field_id]; // and polulate with data: @@ -1937,8 +1931,7 @@ static int ingest_to_cluster(struct client *cl, if (rank) { relevance_countwords(se->relevance, cluster, - (char *) value, rank, ser_md->name, - ser_md->frank); + (char *) value, rank, ser_md->name); } // construct facets ... unless the client already has reported them @@ -1961,6 +1954,8 @@ static int ingest_to_cluster(struct client *cl, } // cleaning up + if (xml_rank) + xmlFree(xml_rank); xmlFree(type); xmlFree(value); type = value = 0; diff --git a/test/test_http.cfg b/test/test_http.cfg index 8904f3b..53a0ff0 100644 --- a/test/test_http.cfg +++ b/test/test_http.cfg @@ -14,7 +14,7 @@ + rank="2 au 3" mergekey="optional" /> -- 1.7.10.4