From ebf44db2f0c270b3ae1c25a5dc627f8e2a932da7 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 17 May 2011 15:40:49 +0200 Subject: [PATCH] Per-field native facets, bug 4195 Honor new setting pz:facetmap:field. There is no facet parameter for search as specified in the bug report and that is also not needed. --- doc/pazpar2_conf.xml | 35 ++++++------ etc/settings/opencontent-solr.xml | 38 +++++++++++++ src/client.c | 111 +++++++++++++++++++------------------ src/session.c | 3 - src/settings.c | 4 +- src/settings.h | 15 +++-- test/test_facets.urls | 6 +- test/test_facets_11.res | 15 +++++ test/test_facets_14.res | 15 +++++ test/test_facets_17.res | 15 +++++ test/test_facets_18.res | 15 +++++ test/test_facets_24.res | 30 ++++++---- 12 files changed, 203 insertions(+), 99 deletions(-) create mode 100644 etc/settings/opencontent-solr.xml diff --git a/doc/pazpar2_conf.xml b/doc/pazpar2_conf.xml index 1f7501e..9e75b48 100644 --- a/doc/pazpar2_conf.xml +++ b/doc/pazpar2_conf.xml @@ -1076,42 +1076,39 @@ - pz:termlist_term_count - - - Specifies that the target should return up to n terms for each facets (where termlist="yes"). This implies - that the target can return facets on the search command. Requesting facets on targets that doesn't, - will return unpredictable or error result. - - - - - - pz:termlist_term_sort + pz:preferred - Specifies how the terms should be sorted. (Not yet implemented) + Specifies that a target is preferred, e.g. possible local, faster target. Using block=pref on show command + will wait for all these targets to return records before releasing the block. If no target is preferred, + the block=pref will identical to block=1, which release when one target has returned records. - pz:preferred + pz:block_timeout - Specifies that a target is preferred, e.g. possible local, faster target. Using block=pref on show command - will wait for all these targets to return records before releasing the block. If no target is preferred, - the block=pref will identical to block=1, which release when one target has returned records. + (Not yet implemented). Specifies the time for which a block should be released anyway. - pz:block_timeout + pz:facetmap:name - (Not yet implemented). Specifies the time for which a block should be released anyway. + Specifies that for field name, the target + supports (native) facets. The value is the name of the + field on the target. + + + At this point only SOLR targets have been tested with this + facility. + + diff --git a/etc/settings/opencontent-solr.xml b/etc/settings/opencontent-solr.xml new file mode 100644 index 0000000..0e3d3af --- /dev/null +++ b/etc/settings/opencontent-solr.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/client.c b/src/client.c index 0eee31c..dd3b58c 100644 --- a/src/client.c +++ b/src/client.c @@ -423,27 +423,46 @@ static int nativesyntax_to_type(struct session_database *sdb, char *type, * TODO Consider thread safety!!! * */ -int client_report_facets(struct client *cl, ZOOM_resultset rs) { +void client_report_facets(struct client *cl, ZOOM_resultset rs) +{ int facet_idx; + struct session_database *sdb = client_get_database(cl); ZOOM_facet_field *facets = ZOOM_resultset_facets(rs); int facet_num; struct session *se = client_get_session(cl); + struct setting *s; facet_num = ZOOM_resultset_facets_size(rs); yaz_log(YLOG_DEBUG, "client_report_facets: %d", facet_num); - for (facet_idx = 0; facet_idx < facet_num; facet_idx++) { - const char *name = ZOOM_facet_field_name(facets[facet_idx]); - size_t term_idx; - size_t term_num = ZOOM_facet_field_term_count(facets[facet_idx]); - for (term_idx = 0; term_idx < term_num; term_idx++ ) { - int freq; - const char *term = ZOOM_facet_field_get_term(facets[facet_idx], term_idx, &freq); - if (term) - add_facet(se, name, term, freq); + for (s = sdb->settings[PZ_FACETMAP]; s; s = s->next) + { + const char *p = strchr(s->name + 3, ':'); + if (p && p[1] && s->value && s->value[0]) + { + p++; /* p now holds logical facet name */ + for (facet_idx = 0; facet_idx < facet_num; facet_idx++) + { + const char *native_name = + ZOOM_facet_field_name(facets[facet_idx]); + if (native_name && !strcmp(s->value, native_name)) + { + size_t term_idx; + size_t term_num = + ZOOM_facet_field_term_count(facets[facet_idx]); + for (term_idx = 0; term_idx < term_num; term_idx++ ) + { + int freq; + const char *term = + ZOOM_facet_field_get_term(facets[facet_idx], + term_idx, &freq); + if (term) + add_facet(se, p, term, freq); + } + break; + } + } } } - - return 0; } static void ingest_raw_record(struct client *cl, ZOOM_record rec) @@ -606,63 +625,49 @@ void client_record_response(struct client *cl) } } -static int client_set_facets_request(struct client *cl, ZOOM_connection link) +static void client_set_facets_request(struct client *cl, ZOOM_connection link) { struct session_database *sdb = client_get_database(cl); - const char *opt_facet_term_sort = session_setting_oneval(sdb, PZ_TERMLIST_TERM_SORT); - const char *opt_facet_term_count = session_setting_oneval(sdb, PZ_TERMLIST_TERM_COUNT); - /* Future record filtering on target */ - /* const char *opt_facet_record_filter = session_setting_oneval(sdb, PZ_RECORDFILTER); */ + WRBUF w = wrbuf_alloc(); + + struct setting *s; - /* Disable when no count is set */ - /* TODO Verify: Do we need to reset the ZOOM facets if a ZOOM Connection is being reused??? */ - if (opt_facet_term_count && *opt_facet_term_count) + for (s = sdb->settings[PZ_FACETMAP]; s; s = s->next) { - int index = 0; - struct session *session = client_get_session(cl); - struct conf_service *service = session->service; - int num = service->num_metadata; - WRBUF wrbuf = wrbuf_alloc(); - yaz_log(YLOG_DEBUG, "Facet settings, sort: %s count: %s", - opt_facet_term_sort, opt_facet_term_count); - for (index = 0; index < num; index++) + const char *p = strchr(s->name + 3, ':'); + if (!p) { - struct conf_metadata *conf_meta = &service->metadata[index]; - if (conf_meta->termlist) - { - if (wrbuf_len(wrbuf)) - wrbuf_puts(wrbuf, ", "); - wrbuf_printf(wrbuf, "@attr 1=%s", conf_meta->name); - - if (opt_facet_term_sort && *opt_facet_term_sort) - wrbuf_printf(wrbuf, " @attr 2=%s", opt_facet_term_sort); - wrbuf_printf(wrbuf, " @attr 3=%s", opt_facet_term_count); - } + yaz_log(YLOG_WARN, "Malformed facetmap name: %s", s->name); } - if (wrbuf_len(wrbuf)) + else if (s->value && s->value[0]) { - yaz_log(YLOG_LOG, "Setting ZOOM facets option: %s", wrbuf_cstr(wrbuf)); - ZOOM_connection_option_set(link, "facets", wrbuf_cstr(wrbuf)); - return 1; + wrbuf_puts(w, "@attr 1="); + yaz_encode_pqf_term(w, s->value, strlen(s->value)); + if (s->next) + wrbuf_puts(w, ","); } } - return 0; + yaz_log(YLOG_LOG, "using facets str: %s", wrbuf_cstr(w)); + ZOOM_connection_option_set(link, "facets", + wrbuf_len(w) ? wrbuf_cstr(w) : 0); + wrbuf_destroy(w); } -int client_has_facet(struct client *cl, const char *name) { - ZOOM_facet_field facet_field; - if (!cl || !cl->resultset || !name) { - return 0; - } - facet_field = ZOOM_resultset_get_facet_field(cl->resultset, name); - if (facet_field) { - return 1; +int client_has_facet(struct client *cl, const char *name) +{ + struct session_database *sdb = client_get_database(cl); + struct setting *s; + + for (s = sdb->settings[PZ_FACETMAP]; s; s = s->next) + { + const char *p = strchr(s->name + 3, ':'); + if (p && !strcmp(name, p + 1)) + return 1; } return 0; } - void client_start_search(struct client *cl) { struct session_database *sdb = client_get_database(cl); diff --git a/src/session.c b/src/session.c index 75fbb17..0453377 100644 --- a/src/session.c +++ b/src/session.c @@ -590,8 +590,6 @@ int session_is_preferred_clients_ready(struct session *s) return res == 0; } - - enum pazpar2_error_code search(struct session *se, const char *query, const char *startrecs, const char *maxrecs, @@ -1503,7 +1501,6 @@ static int ingest_to_cluster(struct client *cl, // construct facets ... unless the client already has reported them if (ser_md->termlist && !client_has_facet(cl, (char *) type)) { - if (ser_md->type == Metadata_type_year) { char year[64]; diff --git a/src/settings.c b/src/settings.c index 691e627..b8c1d0a 100644 --- a/src/settings.c +++ b/src/settings.c @@ -71,14 +71,12 @@ static char *hard_settings[] = { "pz:negotiation_charset", "pz:max_connections", "pz:reuse_connections", /* PZ_REUSE_CONNECTION */ - "pz:termlist_term_sort", /* PZ_TERMLIST_TERM_SORT */ - "pz:termlist_term_count", /* PZ_TERMLIST_TERM_COUNT */ "pz:termlist_term_factor", /* PZ_TERMLIST_TERM_FACTOR*/ "pz:preferred", /* PZ_PREFERRED */ "pz:extra_args", /* PZ_EXTRA_ARGS */ "pz:query_syntax", /* PZ_QUERY_SYNTAX */ "pz:option_recordfilter", /* PZ_OPTION_RECORDFILTER */ - + "pz:facetmap:", /* PZ_FACETMAP */ 0 }; diff --git a/src/settings.h b/src/settings.h index 86eb31d..5862b9e 100644 --- a/src/settings.h +++ b/src/settings.h @@ -44,14 +44,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define PZ_NEGOTIATION_CHARSET 21 #define PZ_MAX_CONNECTIONS 22 #define PZ_REUSE_CONNECTIONS 23 -#define PZ_TERMLIST_TERM_SORT 24 -#define PZ_TERMLIST_TERM_COUNT 25 -#define PZ_TERMLIST_TERM_FACTOR 26 -#define PZ_PREFERRED 27 -#define PZ_EXTRA_ARGS 28 -#define PZ_QUERY_SYNTAX 29 -#define PZ_OPTION_RECORDFILTER 30 -#define PZ_MAX_EOF 31 +#define PZ_TERMLIST_TERM_FACTOR 24 +#define PZ_PREFERRED 25 +#define PZ_EXTRA_ARGS 26 +#define PZ_QUERY_SYNTAX 27 +#define PZ_OPTION_RECORDFILTER 28 +#define PZ_FACETMAP 29 +#define PZ_MAX_EOF 30 struct setting { diff --git a/test/test_facets.urls b/test/test_facets.urls index eea7f7b..af9d74a 100644 --- a/test/test_facets.urls +++ b/test/test_facets.urls @@ -1,10 +1,10 @@ http://localhost:9763/search.pz2?command=init&clear=1 -http://localhost:9763/search.pz2?session=1&command=settings&pz:name%5Blocalhost:9999%2Fdb1%5D=db1&pz:requestsyntax%5Blocalhost:9999%2Fdb1%5D=usmarc&pz:nativesyntax%5Blocalhost:9999%2Fdb1%5D=txml&pz:xslt%5Blocalhost:9999%2Fdb1%5D=tmarc.xsl&pz:termlist_term_count%5Blocalhost:9999%2Fdb1%5D=0 +http://localhost:9763/search.pz2?session=1&command=settings&pz:name%5Blocalhost:9999%2Fdb1%5D=db1&pz:requestsyntax%5Blocalhost:9999%2Fdb1%5D=usmarc&pz:nativesyntax%5Blocalhost:9999%2Fdb1%5D=txml&pz:xslt%5Blocalhost:9999%2Fdb1%5D=tmarc.xsl http://localhost:9763/search.pz2?session=1&command=search&query=7+and+computer http://localhost:9763/search.pz2?session=1&command=show&block=1 http://localhost:9763/search.pz2?session=1&command=bytarget 4 http://localhost:9763/search.pz2?session=1&command=termlist&name=xtargets%2Cauthor%2Csubject%2Cdate%2Cmedium -http://localhost:9763/search.pz2?session=1&command=settings&pz:name%5Blocalhost:9999%2Fdb1%5D=db1&pz:requestsyntax%5Blocalhost:9999%2Fdb1%5D=usmarc&pz:nativesyntax%5Blocalhost:9999%2Fdb1%5D=txml&pz:xslt%5Blocalhost:9999%2Fdb1%5D=tmarc.xsl&pz:termlist_term_count%5Blocalhost:9999%2Fdb1%5D=5&pz:maxrecs%5Blocalhost:9999%2Fdb1%5D=2 +http://localhost:9763/search.pz2?session=1&command=settings&pz:name%5Blocalhost:9999%2Fdb1%5D=db1&pz:requestsyntax%5Blocalhost:9999%2Fdb1%5D=usmarc&pz:nativesyntax%5Blocalhost:9999%2Fdb1%5D=txml&pz:xslt%5Blocalhost:9999%2Fdb1%5D=tmarc.xsl&pz:maxrecs%5Blocalhost:9999%2Fdb1%5D=2&pz:facetmap:author%5Blocalhost:9999%2Fdb1%5D=author&pz:facetmap:subject%5Blocalhost:9999%2Fdb1%5D=subject&pz:facetmap:date%5Blocalhost:9999%2Fdb1%5D=date http://localhost:9763/search.pz2?session=1&command=search&query=4+and+computer http://localhost:9763/search.pz2?session=1&command=show&block=1 http://localhost:9763/search.pz2?session=1&command=bytarget @@ -18,7 +18,7 @@ http://localhost:9763/search.pz2?session=1&command=termlist&name=xtargets%2Cauth 10 http://localhost:9763/search.pz2?session=1&command=termlist&name=xtargets%2Cauthor%2Csubject%2Cdate%2Cmedium http://localhost:9763/search.pz2?session=1&command=bytarget http://localhost:9763/search.pz2?command=init -http://localhost:9763/search.pz2?session=2&command=settings&pz:name%5Blocalhost:9999%2Fdb1%5D=db1&pz:requestsyntax%5Blocalhost:9999%2Fdb1%5D=usmarc&pz:nativesyntax%5Blocalhost:9999%2Fdb1%5D=txml&pz:xslt%5Blocalhost:9999%2Fdb1%5D=tmarc.xsl&pz:apdulog%5Blocalhost:9999%2Fdb1%5D=1 +http://localhost:9763/search.pz2?session=2&command=settings&pz:name%5Blocalhost:9999%2Fdb1%5D=db1&pz:requestsyntax%5Blocalhost:9999%2Fdb1%5D=usmarc&pz:nativesyntax%5Blocalhost:9999%2Fdb1%5D=txml&pz:xslt%5Blocalhost:9999%2Fdb1%5D=tmarc.xsl&pz:apdulog%5Blocalhost:9999%2Fdb1%5D=1&pz:facetmap:date%5Blocalhost:9999%2Fdb1%5D=date http://localhost:9763/search.pz2?session=2&command=search&query=9+and+computer 10 http://localhost:9763/search.pz2?session=2&command=show&block=1 http://localhost:9763/search.pz2?session=2&command=termlist&name=xtargets%2Cauthor%2Csubject%2Cdate%2Cmedium diff --git a/test/test_facets_11.res b/test/test_facets_11.res index 64048ad..a63c538 100644 --- a/test/test_facets_11.res +++ b/test/test_facets_11.res @@ -15,6 +15,11 @@ author280 author370 author460 +author550 +author640 +author730 +author820 +author910 subject0100 @@ -22,6 +27,11 @@ subject280 subject370 subject460 +subject550 +subject640 +subject730 +subject820 +subject910 date0100 @@ -29,6 +39,11 @@ date280 date370 date460 +date550 +date640 +date730 +date820 +date910 diff --git a/test/test_facets_14.res b/test/test_facets_14.res index 0cf8475..812c1c9 100644 --- a/test/test_facets_14.res +++ b/test/test_facets_14.res @@ -15,6 +15,11 @@ author280 author370 author460 +author550 +author640 +author730 +author820 +author910 subject0100 @@ -22,6 +27,11 @@ subject280 subject370 subject460 +subject550 +subject640 +subject730 +subject820 +subject910 date0100 @@ -29,6 +39,11 @@ date280 date370 date460 +date550 +date640 +date730 +date820 +date910 diff --git a/test/test_facets_17.res b/test/test_facets_17.res index e988091..abb5f68 100644 --- a/test/test_facets_17.res +++ b/test/test_facets_17.res @@ -15,6 +15,11 @@ author280 author370 author460 +author550 +author640 +author730 +author820 +author910 subject0100 @@ -22,6 +27,11 @@ subject280 subject370 subject460 +subject550 +subject640 +subject730 +subject820 +subject910 date0100 @@ -29,6 +39,11 @@ date280 date370 date460 +date550 +date640 +date730 +date820 +date910 diff --git a/test/test_facets_18.res b/test/test_facets_18.res index e988091..abb5f68 100644 --- a/test/test_facets_18.res +++ b/test/test_facets_18.res @@ -15,6 +15,11 @@ author280 author370 author460 +author550 +author640 +author730 +author820 +author910 subject0100 @@ -22,6 +27,11 @@ subject280 subject370 subject460 +subject550 +subject640 +subject730 +subject820 +subject910 date0100 @@ -29,6 +39,11 @@ date280 date370 date460 +date550 +date640 +date730 +date820 +date910 diff --git a/test/test_facets_24.res b/test/test_facets_24.res index a8f2b85..a872155 100644 --- a/test/test_facets_24.res +++ b/test/test_facets_24.res @@ -10,18 +10,23 @@ -author0100 -author190 -author280 -author370 -author460 +Jack Collins2 +Mairs, John W1 +Wood, Helen M1 +Englund, Carl R1 -subject0100 -subject190 -subject280 -subject370 -subject460 +Radioisotope scanning1 +Scintillation cameras1 +Imaging systems in medicine1 +Bible. O.T1 +Bible1 +Cartography1 +Puget Sound region (Wash.)1 +Tomography1 +Optical pattern recognition1 +Computers1 +Railroads1 date0100 @@ -29,6 +34,11 @@ date280 date370 date460 +date550 +date640 +date730 +date820 +date910 -- 1.7.10.4