From e07a5d5dfcf5be4cc9eb3ab91a60866ff0aee09c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 12 Oct 2011 11:30:08 +0200 Subject: [PATCH] Initial search may perform (relevance) sort The increasing flag is now taken into consideration. The pz:sortmap does not include sort spec flags anymore: just strategy:field . More test results added. It is clear that the block flag is not honored for show as it should: would like block=1 to wait until first record comes in - after (re)search with different sort criteria. --- src/client.c | 43 ++++++++++--------- src/client.h | 4 +- src/http_command.c | 6 +-- src/session.c | 113 ++++++++++++++++++++++++-------------------------- src/session.h | 14 ++++--- test/test_url.urls | 8 ++-- test/test_url_10.res | 71 +++++++++++++++++++++++++++++++ test/test_url_9.res | 53 +++++++++++++++++++++++ 8 files changed, 221 insertions(+), 91 deletions(-) create mode 100644 test/test_url_10.res create mode 100644 test/test_url_9.res diff --git a/src/client.c b/src/client.c index 05f6541..39190c0 100644 --- a/src/client.c +++ b/src/client.c @@ -659,8 +659,8 @@ int client_has_facet(struct client *cl, const char *name) return 0; } -void client_start_search(struct client *cl, const char *sort_strategy, - const char *sort_spec) +void client_start_search(struct client *cl, const char *sort_strategy_and_spec, + int increasing) { struct session_database *sdb = client_get_database(cl); struct connection *co = client_get_connection(cl); @@ -676,6 +676,7 @@ void client_start_search(struct client *cl, const char *sort_strategy, const char *opt_preferred = session_setting_oneval(sdb, PZ_PREFERRED); const char *extra_args = session_setting_oneval(sdb, PZ_EXTRA_ARGS); char maxrecs_str[24], startrecs_str[24]; + ZOOM_query q; assert(link); @@ -727,38 +728,42 @@ void client_start_search(struct client *cl, const char *sort_strategy, /* facets definition is in PQF */ client_set_facets_request(cl, link); + q = ZOOM_query_create(); if (cl->cqlquery) { - ZOOM_query q = ZOOM_query_create(); yaz_log(YLOG_LOG, "Search %s CQL: %s", client_get_id(cl), cl->cqlquery); ZOOM_query_cql(q, cl->cqlquery); if (*opt_sort) ZOOM_query_sortby(q, opt_sort); - if (sort_strategy && sort_spec) - { - yaz_log(YLOG_LOG, "applying %s %s", sort_strategy, sort_spec); - ZOOM_query_sortby2(q, sort_strategy, sort_spec); - } - rs = ZOOM_connection_search(link, q); - ZOOM_query_destroy(q); } else { - ZOOM_query q = ZOOM_query_create(); - yaz_log(YLOG_LOG, "Search %s PQF: %s", client_get_id(cl), cl->pquery); - + ZOOM_query_prefix(q, cl->pquery); - - if (sort_strategy && sort_spec) + } + if (sort_strategy_and_spec && + strlen(sort_strategy_and_spec) < 40 /* spec below */) + { + char spec[50], *p; + strcpy(spec, sort_strategy_and_spec); + p = strchr(spec, ':'); + if (p) { - yaz_log(YLOG_LOG, "applying %s %s", sort_strategy, sort_spec); - ZOOM_query_sortby2(q, sort_strategy, sort_spec); + *p++ = '\0'; /* cut the string in two */ + while (*p == ' ') + p++; + if (increasing) + strcat(p, " <"); + else + strcat(p, " >"); + yaz_log(YLOG_LOG, "applying %s %s", spec, p); + ZOOM_query_sortby2(q, spec, p); } - rs = ZOOM_connection_search(link, q); - ZOOM_query_destroy(q); } + rs = ZOOM_connection_search(link, q); + ZOOM_query_destroy(q); ZOOM_resultset_destroy(cl->resultset); cl->resultset = rs; connection_continue(co); diff --git a/src/client.h b/src/client.h index d7da767..2fb8ada 100644 --- a/src/client.h +++ b/src/client.h @@ -76,8 +76,8 @@ int client_prep_connection(struct client *cl, int operation_timeout, int session_timeout, iochan_man_t iochan, const struct timeval *abstime); -void client_start_search(struct client *cl, const char *sort_strategy, - const char *sort_spec); +void client_start_search(struct client *cl, const char *sort_strategy_and_spec, + int increasing); void client_set_session(struct client *cl, struct session *se); int client_is_active(struct client *cl); int client_is_active_preferred(struct client *cl); diff --git a/src/http_command.c b/src/http_command.c index 9874653..a4d5106 100644 --- a/src/http_command.c +++ b/src/http_command.c @@ -969,7 +969,7 @@ static void cmd_show(struct http_channel *c) release_session(c, s); return; } - search_sort(s->psession, sp->name, sp->increasing); + session_sort(s->psession, sp->name, sp->increasing); if (block) { @@ -1039,8 +1039,8 @@ static void cmd_search(struct http_channel *c) release_session(c, s); return; } - code = search(s->psession, query, startrecs, maxrecs, filter, limit, - &addinfo); + code = session_search(s->psession, query, startrecs, maxrecs, filter, limit, + &addinfo, "relevance", 0); if (code) { error(rs, code, addinfo); diff --git a/src/session.c b/src/session.c index c9628e4..0e5427e 100644 --- a/src/session.c +++ b/src/session.c @@ -597,7 +597,32 @@ int session_is_preferred_clients_ready(struct session *s) return res == 0; } -void search_sort(struct session *se, const char *field, int increasing) +static const char *get_strategy_plus_sort(struct client *l, const char *field) +{ + struct session_database *sdb = client_get_database(l); + struct setting *s; + + const char *strategy_plus_sort = 0; + + for (s = sdb->settings[PZ_SORTMAP]; s; s = s->next) + { + char *p = strchr(s->name + 3, ':'); + if (!p) + { + yaz_log(YLOG_WARN, "Malformed sortmap name: %s", s->name); + continue; + } + p++; + if (!strcmp(p, field)) + { + strategy_plus_sort = s->value; + break; + } + } + return strategy_plus_sort; +} + +void session_sort(struct session *se, const char *field, int increasing) { struct session_sorted_results *sr; struct client_list *l; @@ -628,26 +653,7 @@ void search_sort(struct session *se, const char *field, int increasing) for (l = se->clients; l; l = l->next) { struct client *cl = l->client; - struct session_database *sdb = client_get_database(cl); - struct setting *s; - const char *strategy_plus_sort = 0; - - for (s = sdb->settings[PZ_SORTMAP]; s; s = s->next) - { - char *p = strchr(s->name + 3, ':'); - if (!p) - { - yaz_log(YLOG_WARN, "Malformed sortmap name: %s", s->name); - continue; - } - p++; - if (!strcmp(p, field)) - { - strategy_plus_sort = s->value; - break; - } - } - + const char *strategy_plus_sort = get_strategy_plus_sort(cl, field); if (strategy_plus_sort) { struct timeval tval; @@ -655,30 +661,20 @@ void search_sort(struct session *se, const char *field, int increasing) se->service->z3950_session_timeout, se->service->server->iochan_man, &tval)) - { - char **array; - int num; - nmem_strsplit(se->nmem, ":", strategy_plus_sort, &array, &num); - - if (num == 2) - { - const char *sort_spec = array[1]; - while (*sort_spec == ' ') - sort_spec++; - client_start_search(cl, array[0], sort_spec); - } - } + client_start_search(cl, strategy_plus_sort, increasing); } } session_leave(se); } -enum pazpar2_error_code search(struct session *se, - const char *query, - const char *startrecs, const char *maxrecs, - const char *filter, - const char *limit, - const char **addinfo) +enum pazpar2_error_code session_search(struct session *se, + const char *query, + const char *startrecs, + const char *maxrecs, + const char *filter, + const char *limit, + const char **addinfo, + const char *sort_field, int increasing) { int live_channels = 0; int no_working = 0; @@ -703,8 +699,8 @@ enum pazpar2_error_code search(struct session *se, /* reset list of sorted results and clear to relevance search */ se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results)); - se->sorted_results->field = nmem_strdup(se->nmem, "relevance"); - se->sorted_results->increasing = 0; + se->sorted_results->field = nmem_strdup(se->nmem, sort_field); + se->sorted_results->increasing = increasing; se->sorted_results->next = 0; live_channels = select_targets(se, filter); @@ -729,6 +725,7 @@ enum pazpar2_error_code search(struct session *se, for (l = se->clients; l; l = l->next) { struct client *cl = l->client; + const char *strategy_plus_sort = get_strategy_plus_sort(cl, sort_field); if (maxrecs) client_set_maxrecs(cl, atoi(maxrecs)); @@ -745,7 +742,7 @@ enum pazpar2_error_code search(struct session *se, se->service->z3950_session_timeout, se->service->server->iochan_man, &tval)) - client_start_search(cl, 0, 0); + client_start_search(cl, strategy_plus_sort, increasing); } } facet_limits_destroy(facet_limits); @@ -1535,6 +1532,9 @@ static int ingest_to_cluster(struct client *cl, xmlChar *value = 0; struct session *se = client_get_session(cl); struct conf_service *service = se->service; + int term_factor = 1; + struct record_cluster *cluster; + struct session_database *sdb = client_get_database(cl); struct record *record = record_create(se->nmem, service->num_metadata, service->num_sortkeys, cl, @@ -1594,25 +1594,22 @@ static int ingest_to_cluster(struct client *cl, } } - struct record_cluster *cluster = reclist_insert(se->reclist, - service, - record, - mergekey_norm, - &se->total_merged); + cluster = reclist_insert(se->reclist, service, record, + mergekey_norm, &se->total_merged); if (!cluster) return -1; - struct session_database *sdb = client_get_database(cl); - int term_factor = 1; - const char *use_term_factor_str = - session_setting_oneval(sdb, PZ_TERMLIST_TERM_FACTOR); - if (use_term_factor_str && use_term_factor_str[0] == '1') { - int maxrecs = client_get_maxrecs(cl); - int hits = (int) client_get_hits(cl); - term_factor = MAX(hits, maxrecs) / MAX(1, maxrecs); - assert(term_factor >= 1); - yaz_log(YLOG_DEBUG, "Using term factor: %d (%d / %d)", term_factor, MAX(hits, maxrecs), MAX(1, maxrecs)); + const char *use_term_factor_str = + session_setting_oneval(sdb, PZ_TERMLIST_TERM_FACTOR); + if (use_term_factor_str && use_term_factor_str[0] == '1') + { + int maxrecs = client_get_maxrecs(cl); + int hits = (int) client_get_hits(cl); + term_factor = MAX(hits, maxrecs) / MAX(1, maxrecs); + assert(term_factor >= 1); + yaz_log(YLOG_DEBUG, "Using term factor: %d (%d / %d)", term_factor, MAX(hits, maxrecs), MAX(1, maxrecs)); + } } if (global_parameters.dump_records) diff --git a/src/session.h b/src/session.h index b839ef2..38e3a38 100644 --- a/src/session.h +++ b/src/session.h @@ -147,12 +147,14 @@ void destroy_session(struct session *s); void session_init_databases(struct session *s); void statistics(struct session *s, struct statistics *stat); -void search_sort(struct session *se, const char *field, int increasing); - -enum pazpar2_error_code search(struct session *s, const char *query, - const char *startrecs, const char *maxrecs, - const char *filter, const char *limit, - const char **addinfo); +void session_sort(struct session *se, const char *field, int increasing); + +enum pazpar2_error_code session_search(struct session *s, const char *query, + const char *startrecs, + const char *maxrecs, + const char *filter, const char *limit, + const char **addinfo, + const char *sort_field, int increasing); struct record_cluster **show_range_start(struct session *s, struct reclist_sortparms *sp, int start, diff --git a/test/test_url.urls b/test/test_url.urls index b497907..d93248f 100644 --- a/test/test_url.urls +++ b/test/test_url.urls @@ -1,8 +1,10 @@ http://localhost:9763/search.pz2?command=init&clear=1&pz:elements%5Bmy%5D=F&pz:requestsyntax%5Bmy%5D=usmarc&pz:nativesyntax%5Bmy%5D=iso2709&pz:xslt%5Bmy%5D=marc21_test.xsl&pz:name%5Bmy%5D=marcserver&pz:url%5Bmy%5D=z3950.indexdata.com%2Fmarc http://localhost:9763/search.pz2?session=1&command=search&query=computer 2 http://localhost:9763/search.pz2?session=1&command=show&block=1 -http://localhost:9763/search.pz2?session=1&command=settings&pz:url%5Bmy%5D=z3950.indexdata.com%2Fgils&pz:sortmap:title%5Bmy%5D=type7:title+%3C -http://localhost:9763/search.pz2?session=1&command=search&query=computer +http://localhost:9763/search.pz2?session=1&command=settings&pz:url%5Bmy%5D=z3950.indexdata.com%2Fgils&pz:sortmap:title%5Bmy%5D=type7:title +http://localhost:9763/search.pz2?session=1&command=search&query=the&maxrecs=3 2 http://localhost:9763/search.pz2?session=1&command=show&block=1 2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:1 -1 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:0 +2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:1 +2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:0 +2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:0 diff --git a/test/test_url_10.res b/test/test_url_10.res new file mode 100644 index 0000000..c6c0bf1 --- /dev/null +++ b/test/test_url_10.res @@ -0,0 +1,71 @@ + +OK +0 +7 +31 +0 +7 + + +WATER WELL DATA +This database contains the following information on water wells in Nevada: driller's name, owner's name, location, formations encountered, lithologic descriptions, water level, and water quality +WATER WELL DATA +This database contains the following information on water wells in Nevada: driller's name, owner's name, location, formations encountered, lithologic descriptions, water level, and water quality +1930-PRESENT +title water well data author medium book + + + +UTAH GEOLOGIC MAP BIBLIOGRAPHY +This collection consists of theses, dissertations, and other unpublished maps as well as published maps of the geology of Utah. Some maps of the collection are xeroxed from limited collections. Cross-sections are included in set. Data file consists of map bibliography +UTAH GEOLOGIC MAP BIBLIOGRAPHY +This collection consists of theses, dissertations, and other unpublished maps as well as published maps of the geology of Utah. Some maps of the collection are xeroxed from limited collections. Cross-sections are included in set. Data file consists of map bibliography +-PRESENT +title utah geologic map bibliography author medium book + + + +UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS +Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles +UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS +Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles +-PRESENT +title utah geological and mineral survey publications author medium book + + + +UTAH EARTHQUAKE EPICENTERS +Five files of epicenter data arranged by date comprise this data set. These files are searchable by magnitude and longitude/latitude. Hardcopy of listing and plot of requested area available. Epicenter location and date, magnitude, and focal depth available +UTAH EARTHQUAKE EPICENTERS +Five files of epicenter data arranged by date comprise this data set. These files are searchable by magnitude and longitude/latitude. Hardcopy of listing and plot of requested area available. Epicenter location and date, magnitude, and focal depth available +-PRESENT +title utah earthquake epicenters author medium book + + + +BIBLIOGRAPHY OF MAINE GEOLOGY +This data base is a computer based bibliography of marine geology. It allows searching by topic and geographic location, similar to GEOREF. It is currently under development to replace the printed Bibliography of Marine Geology +BIBLIOGRAPHY OF MAINE GEOLOGY +This data base is a computer based bibliography of marine geology. It allows searching by topic and geographic location, similar to GEOREF. It is currently under development to replace the printed Bibliography of Marine Geology +1692-PRESENT +title bibliography of maine geology author medium book + + + +AUTOMATED FLOOD WARNING NETWORK +The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values) +AUTOMATED FLOOD WARNING NETWORK +The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values) +1982-PRESENT +title automated flood warning network author medium book + + + +APPLIED GEOLOGY FILE +Reports and memorandums completed by the Site Investigation Section comprise this data set. Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems +APPLIED GEOLOGY FILE +Reports and memorandums completed by the Site Investigation Section comprise this data set. Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems +1970-PRESENT +title applied geology file author medium book + + \ No newline at end of file diff --git a/test/test_url_9.res b/test/test_url_9.res new file mode 100644 index 0000000..770d050 --- /dev/null +++ b/test/test_url_9.res @@ -0,0 +1,53 @@ + +OK +0 +5 +31 +0 +5 + + +UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS +Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles +UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS +Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles +-PRESENT +title utah geological and mineral survey publications author medium book + + + +UTAH EARTHQUAKE EPICENTERS +Five files of epicenter data arranged by date comprise this data set. These files are searchable by magnitude and longitude/latitude. Hardcopy of listing and plot of requested area available. Epicenter location and date, magnitude, and focal depth available +UTAH EARTHQUAKE EPICENTERS +Five files of epicenter data arranged by date comprise this data set. These files are searchable by magnitude and longitude/latitude. Hardcopy of listing and plot of requested area available. Epicenter location and date, magnitude, and focal depth available +-PRESENT +title utah earthquake epicenters author medium book + + + +BIBLIOGRAPHY OF MAINE GEOLOGY +This data base is a computer based bibliography of marine geology. It allows searching by topic and geographic location, similar to GEOREF. It is currently under development to replace the printed Bibliography of Marine Geology +BIBLIOGRAPHY OF MAINE GEOLOGY +This data base is a computer based bibliography of marine geology. It allows searching by topic and geographic location, similar to GEOREF. It is currently under development to replace the printed Bibliography of Marine Geology +1692-PRESENT +title bibliography of maine geology author medium book + + + +AUTOMATED FLOOD WARNING NETWORK +The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values) +AUTOMATED FLOOD WARNING NETWORK +The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values) +1982-PRESENT +title automated flood warning network author medium book + + + +APPLIED GEOLOGY FILE +Reports and memorandums completed by the Site Investigation Section comprise this data set. Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems +APPLIED GEOLOGY FILE +Reports and memorandums completed by the Site Investigation Section comprise this data set. Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems +1970-PRESENT +title applied geology file author medium book + + \ No newline at end of file -- 1.7.10.4