From 942a772d08b2e0eb14ad66549daa08bbb0c1fe94 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 8 Nov 2011 12:07:05 +0100 Subject: [PATCH] Conf pz:limitmap may perform local filtering If pz:limitmap's leading string is 'local:', Pazpar2 will perform local filtering for the search commands' limit parameter. Add tests for local filtering on subject and date. --- doc/pazpar2_conf.xml | 13 ++++--- src/client.c | 33 +++++++++++++++++ src/client.h | 6 ++- src/facet_limit.c | 22 +++++++++++ src/facet_limit.h | 2 + src/session.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++-- test/test_http.urls | 18 +++++---- test/test_http_73.res | 2 + test/test_http_74.res | 28 ++++++++++++++ test/test_http_75.res | 2 + test/test_http_76.res | 77 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 282 insertions(+), 18 deletions(-) create mode 100644 test/test_http_73.res create mode 100644 test/test_http_74.res create mode 100644 test/test_http_75.res create mode 100644 test/test_http_76.res diff --git a/doc/pazpar2_conf.xml b/doc/pazpar2_conf.xml index dcc1fdf..f41070c 100644 --- a/doc/pazpar2_conf.xml +++ b/doc/pazpar2_conf.xml @@ -1242,20 +1242,23 @@ Specifies attributes for limiting a search to a field - using - the limit parameter for search. In some cases the mapping of + the limit parameter for search. It can be used to filter locally + or remotely (search in a target). In some cases the mapping of a field to a value is identical to an existing cclmap field; in other cases the field must be specified in a different way - for example to match a complete field (rather than parts of a subfield). - The value of limitmap may have one of two forms: referral to - an exisiting CCL field or a raw PQF string. Leading string - determines type; either ccl: for CCL field or - rpn: for PQF/RPN. + The value of limitmap may have one of three forms: referral to + an existing CCL field, a raw PQF string or a local limit. Leading string + determines type; either ccl: for CCL field, + rpn: for PQF/RPN, or local: + for filtering in Pazpar2. The limitmap facility is supported for Pazpar2 version 1.6.0. + Local filtering is supported in Pazpar2 1.6.6. diff --git a/src/client.c b/src/client.c index 0ae3641..9e0f903 100644 --- a/src/client.c +++ b/src/client.c @@ -121,6 +121,7 @@ struct client { YAZ_MUTEX mutex; int ref_count; char *id; + facet_limits_t facet_limits; }; struct suggestions { @@ -827,6 +828,7 @@ struct client *client_create(const char *id) pazpar2_mutex_create(&cl->mutex, "client"); cl->preferred = 0; cl->ref_count = 1; + cl->facet_limits = 0; assert(id); cl->id = xstrdup(id); client_use(1); @@ -865,6 +867,7 @@ int client_destroy(struct client *c) c->cqlquery = 0; xfree(c->id); assert(!c->connection); + facet_limits_destroy(c->facet_limits); if (c->resultset) { @@ -988,6 +991,33 @@ static char *make_solrquery(struct client *cl) return r; } +const char *client_get_facet_limit_local(struct client *cl, + struct session_database *sdb, + int *l, + NMEM nmem, int *num, char ***values) +{ + const char *name = 0; + const char *value = 0; + for (; (name = facet_limits_get(cl->facet_limits, *l, &value)); (*l)++) + { + struct setting *s = 0; + + for (s = sdb->settings[PZ_LIMITMAP]; s; s = s->next) + { + const char *p = strchr(s->name + 3, ':'); + if (p && !strcmp(p + 1, name) && s->value && + !strncmp(s->value, "local:", 6)) + { + nmem_strsplit_escape2(nmem, "|", value, values, + num, 1, '\\', 1); + (*l)++; + return name; + } + } + } + return 0; +} + static int apply_limit(struct session_database *sdb, facet_limits_t facet_limits, WRBUF w_pqf, WRBUF w_ccl) @@ -1112,6 +1142,9 @@ int client_parse_query(struct client *cl, const char *query, if (apply_limit(sdb, facet_limits, w_pqf, w_ccl)) return -2; + facet_limits_destroy(cl->facet_limits); + cl->facet_limits = facet_limits_dup(facet_limits); + yaz_log(YLOG_LOG, "CCL query: %s", wrbuf_cstr(w_ccl)); cn = ccl_find_str(ccl_map, wrbuf_cstr(w_ccl), &cerror, &cpos); ccl_qual_rm(&ccl_map); diff --git a/src/client.h b/src/client.h index 97e8b68..0b1d413 100644 --- a/src/client.h +++ b/src/client.h @@ -103,8 +103,10 @@ void client_unlock(struct client *c); int client_has_facet(struct client *cl, const char *name); void client_check_preferred_watch(struct client *cl); void client_reingest(struct client *cl); - - +const char *client_get_facet_limit_local(struct client *cl, + struct session_database *sdb, + int *l, + NMEM nmem, int *num, char ***values); #endif /* diff --git a/src/facet_limit.c b/src/facet_limit.c index 44bcff0..92d7eaf 100644 --- a/src/facet_limit.c +++ b/src/facet_limit.c @@ -39,6 +39,28 @@ struct facet_limits { char **darray; }; +facet_limits_t facet_limits_dup(facet_limits_t fl) +{ + int i; + NMEM nmem = nmem_create(); + facet_limits_t fn = nmem_malloc(nmem, sizeof(*fn)); + fn->nmem = nmem; + fn->num = fl->num; + fn->darray = 0; + if (fl->num) + { + fn->darray = nmem_malloc(nmem, fn->num * sizeof(*fn->darray)); + for (i = 0; i < fn->num; i++) + { + const char *src = fl->darray[i]; + size_t sz = strlen(src) + 2 + strlen(src + strlen(src) + 1); + fn->darray[i] = nmem_malloc(nmem, sz); + memcpy(fn->darray[i], src, sz); + } + } + return fn; +} + facet_limits_t facet_limits_create(const char *param) { int i; diff --git a/src/facet_limit.h b/src/facet_limit.h index 60b8e9a..60ce8ce 100644 --- a/src/facet_limit.h +++ b/src/facet_limit.h @@ -32,6 +32,8 @@ const char *facet_limits_get(facet_limits_t fl, int idx, const char **value); void facet_limits_destroy(facet_limits_t fl); +facet_limits_t facet_limits_dup(facet_limits_t fl); + #endif /* diff --git a/src/session.c b/src/session.c index ac41b36..5d34aa4 100644 --- a/src/session.c +++ b/src/session.c @@ -165,13 +165,14 @@ static void session_leave(struct session *s) yaz_mutex_leave(s->session_mutex); } -void add_facet(struct session *s, const char *type, const char *value, int count) +static void session_normalize_facet(struct session *s, const char *type, + const char *value, + WRBUF display_wrbuf, + WRBUF facet_wrbuf) { struct conf_service *service = s->service; pp2_charset_token_t prt; const char *facet_component; - WRBUF facet_wrbuf = wrbuf_alloc(); - WRBUF display_wrbuf = wrbuf_alloc(); int i; const char *icu_chain_id = 0; @@ -208,6 +209,14 @@ void add_facet(struct session *s, const char *type, const char *value, int count } } pp2_charset_token_destroy(prt); +} + +void add_facet(struct session *s, const char *type, const char *value, int count) +{ + WRBUF facet_wrbuf = wrbuf_alloc(); + WRBUF display_wrbuf = wrbuf_alloc(); + + session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf); if (wrbuf_len(facet_wrbuf)) { @@ -1564,13 +1573,83 @@ int ingest_record(struct client *cl, const char *rec, } session_enter(se); if (client_get_session(cl) == se) - ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm); + ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm); session_leave(se); xmlFreeDoc(xdoc); return ret; } +static int check_limit_local(struct client *cl, + struct record *record) +{ + int skip_record = 0; + struct session *se = client_get_session(cl); + struct conf_service *service = se->service; + NMEM nmem_tmp = nmem_create(); + struct session_database *sdb = client_get_database(cl); + int l = 0; + while (!skip_record) + { + struct conf_metadata *ser_md = 0; + struct record_metadata *rec_md = 0; + int md_field_id; + char **values = 0; + int i, num_v = 0; + + const char *name = + client_get_facet_limit_local(cl, sdb, &l, nmem_tmp, &num_v, + &values); + if (!name) + break; + + md_field_id = conf_service_metadata_field_id(service, name); + if (md_field_id < 0) + { + skip_record = 1; + break; + } + ser_md = &service->metadata[md_field_id]; + rec_md = record->metadata[md_field_id]; + for (i = 0; i < num_v; ) + { + if (rec_md) + { + if (ser_md->type == Metadata_type_year + || ser_md->type == Metadata_type_date) + { + int y = atoi(values[i]); + if (y >= rec_md->data.number.min + && y <= rec_md->data.number.max) + break; + } + else + { + yaz_log(YLOG_LOG, "cmp: '%s' '%s'", + rec_md->data.text.disp, values[i]); + if (!strcmp(rec_md->data.text.disp, values[i])) + { + break; + } + } + rec_md = rec_md->next; + } + else + { + rec_md = record->metadata[md_field_id]; + i++; + } + } + if (i == num_v) + { + skip_record = 1; + break; + } + } + nmem_destroy(nmem_tmp); + return skip_record; +} + static int ingest_to_cluster(struct client *cl, xmlDoc *xdoc, xmlNode *root, @@ -1644,6 +1723,16 @@ static int ingest_to_cluster(struct client *cl, } } + if (check_limit_local(cl, record)) + { + session_log(se, YLOG_LOG, "Facet filtered out record no %d from %s", + record_no, sdb->database->id); + if (type) + xmlFree(type); + if (value) + xmlFree(value); + return -2; + } cluster = reclist_insert(se->reclist, service, record, mergekey_norm, &se->total_merged); if (!cluster) diff --git a/test/test_http.urls b/test/test_http.urls index ef1dbf0..de45220 100644 --- a/test/test_http.urls +++ b/test/test_http.urls @@ -55,18 +55,22 @@ http://localhost:9763/search.pz2?session=8&command=search&query=xyzzyz 2 http://localhost:9763/search.pz2?session=8&command=show&block=1 http://localhost:9763/search.pz2?session=8&command=search&query=a+and 1 http://localhost:9763/search.pz2?session=8&command=show&block=1 -http://localhost:9763/search.pz2?command=init&pz:limitmap:author%5Bz3950.indexdata.com%2Fmarc%5D=ccl:author_phrase -1 http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3dadam\,+james%7Cother_author +http://localhost:9763/search.pz2?command=init&pz:limitmap:author%5Bz3950.indexdata.com%2Fmarc%5D=ccl:author_phrase&pz:limitmap:subject%5Bz3950.indexdata.com%2fmarc%5D=local:&pz:limitmap:date%5Bz3950.indexdata.com%2fmarc%5D=local: +1 http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3Dadam\,+james%7Cother_author 1 http://localhost:9763/search.pz2?session=9&command=show&block=1 -http://localhost:9763/search.pz2?session=9&command=settings&pz:limitmap:author%5Bz3950.indexdata.com%2Fmarc%5D=rpn:%40attr+1%3d1003+%40attr+6%3d3 -1 http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3dadam\,+james%7Cother_author +http://localhost:9763/search.pz2?session=9&command=settings&pz:limitmap:author%5Bz3950.indexdata.com%2Fmarc%5D=rpn:%40attr+1%3D1003+%40attr+6%3D3 +1 http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3Dadam\,+james%7Cother_author 1 http://localhost:9763/search.pz2?session=9&command=show&block=1 -http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3dadam\,+james%7Cother_author&filter=pz%3Aid%3Dz3950.indexdata.com%2Fmarc +http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3Dadam\,+james%7Cother_author&filter=pz%3Aid%3Dz3950.indexdata.com%2Fmarc http://localhost:9763/search.pz2?session=9&command=bytarget http://localhost:9763/search.pz2?session=9&command=show -http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3dadam\,+james%7Cother_author&filter=pz%3Aid%3Dnone +http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3Dadam\,+james%7Cother_author&filter=pz%3Aid%3Dnone http://localhost:9763/search.pz2?session=9&command=bytarget http://localhost:9763/search.pz2?session=9&command=show -http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3dadam\,+james%7Cother_author +http://localhost:9763/search.pz2?session=9&command=search&query=greece&limit=author%3Dadam\,+james%7Cother_author http://localhost:9763/search.pz2?session=9&command=bytarget http://localhost:9763/search.pz2?session=9&command=show +http://localhost:9763/search.pz2?session=9&command=search&query=computer&limit=subject%3DRailroads +http://localhost:9763/search.pz2?session=9&command=show&block=1 +http://localhost:9763/search.pz2?session=9&command=search&query=computer&limit=date%3D1977 +http://localhost:9763/search.pz2?session=9&command=show&block=1 diff --git a/test/test_http_73.res b/test/test_http_73.res new file mode 100644 index 0000000..ab63fe6 --- /dev/null +++ b/test/test_http_73.res @@ -0,0 +1,2 @@ + +OK \ No newline at end of file diff --git a/test/test_http_74.res b/test/test_http_74.res new file mode 100644 index 0000000..64e8673 --- /dev/null +++ b/test/test_http_74.res @@ -0,0 +1,28 @@ + +OK +0 +1 +10 +0 +1 + + +Washington metropolitan area rail computer feasibility study; +final report +1971 +Englund, Carl R +Railroads +"Contract DOT-UT-10003." +Washington metropolitan area rail computer feasibility study; +final report +1971 +Englund, Carl R +Railroads +"Contract DOT-UT-10003." +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +85714 +content: title washington metropolitan area rail computer feasibility study author englund carl r medium book + + \ No newline at end of file diff --git a/test/test_http_75.res b/test/test_http_75.res new file mode 100644 index 0000000..ab63fe6 --- /dev/null +++ b/test/test_http_75.res @@ -0,0 +1,2 @@ + +OK \ No newline at end of file diff --git a/test/test_http_76.res b/test/test_http_76.res new file mode 100644 index 0000000..ba0f848 --- /dev/null +++ b/test/test_http_76.res @@ -0,0 +1,77 @@ + +OK +0 +4 +10 +0 +4 + + +Computer science & technology +proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976 +1977 +Optical pattern recognition +Computer science & technology +proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976 +1977 +Optical pattern recognition +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +57536 +content: title computer science technology author medium book + + + +The Computer Bible +1973-1980 +Bible. O.T +Bible +Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates +The Computer Bible +1973-1980 +Bible. O.T +Bible +Hebrew and Greek; introductions in English +Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +57536 +content: title the computer bible author medium book + + + +The use of passwords for controlled access to computer resources +1977 +Wood, Helen M +Computers +The use of passwords for controlled access to computer resources +1977 +Wood, Helen M +Computers +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +17260 +content: title the use of passwords for controlled access to computer resources author wood helen m medium book + + + +Reconstruction tomography in diagnostic radiology and nuclear medicine +proceedings of the workshop +1977 +Tomography +Includes bibliographical references and index +Reconstruction tomography in diagnostic radiology and nuclear medicine +proceedings of the workshop +1977 +Tomography +Includes bibliographical references and index +XXXXXXXXXX +test-usersetting-2 data: + YYYYYYYYY +0 +content: title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book + + \ No newline at end of file -- 1.7.10.4