Initial search may perform (relevance) sort
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 12 Oct 2011 09:30:08 +0000 (11:30 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 12 Oct 2011 09:30:08 +0000 (11:30 +0200)
The increasing flag is now taken into consideration. The pz:sortmap
does not include sort spec flags anymore: just strategy:field .
More test results added. It is clear that the block flag is not
honored for show as it should: would like block=1 to wait until
first record comes in - after (re)search with different sort criteria.

src/client.c
src/client.h
src/http_command.c
src/session.c
src/session.h
test/test_url.urls
test/test_url_10.res [new file with mode: 0644]
test/test_url_9.res [new file with mode: 0644]

index 05f6541..39190c0 100644 (file)
@@ -659,8 +659,8 @@ int client_has_facet(struct client *cl, const char *name)
     return 0;
 }
 
-void client_start_search(struct client *cl, const char *sort_strategy,
-                         const char *sort_spec)
+void client_start_search(struct client *cl, const char *sort_strategy_and_spec,
+                         int increasing)
 {
     struct session_database *sdb = client_get_database(cl);
     struct connection *co = client_get_connection(cl);
@@ -676,6 +676,7 @@ void client_start_search(struct client *cl, const char *sort_strategy,
     const char *opt_preferred   = session_setting_oneval(sdb, PZ_PREFERRED);
     const char *extra_args      = session_setting_oneval(sdb, PZ_EXTRA_ARGS);
     char maxrecs_str[24], startrecs_str[24];
+    ZOOM_query q;
 
     assert(link);
 
@@ -727,38 +728,42 @@ void client_start_search(struct client *cl, const char *sort_strategy,
     /* facets definition is in PQF */
     client_set_facets_request(cl, link);
 
+    q = ZOOM_query_create();
     if (cl->cqlquery)
     {
-        ZOOM_query q = ZOOM_query_create();
         yaz_log(YLOG_LOG, "Search %s CQL: %s", client_get_id(cl),
                 cl->cqlquery);
         ZOOM_query_cql(q, cl->cqlquery);
         if (*opt_sort)
             ZOOM_query_sortby(q, opt_sort);
-        if (sort_strategy && sort_spec)
-        {
-            yaz_log(YLOG_LOG, "applying %s %s", sort_strategy, sort_spec);
-            ZOOM_query_sortby2(q, sort_strategy, sort_spec);
-        }
-        rs = ZOOM_connection_search(link, q);
-        ZOOM_query_destroy(q);
     }
     else
     {
-        ZOOM_query q = ZOOM_query_create();
-
         yaz_log(YLOG_LOG, "Search %s PQF: %s", client_get_id(cl), cl->pquery);
-
+        
         ZOOM_query_prefix(q, cl->pquery);
-
-        if (sort_strategy && sort_spec)
+    }
+    if (sort_strategy_and_spec &&
+        strlen(sort_strategy_and_spec) < 40 /* spec below */)
+    {
+        char spec[50], *p;
+        strcpy(spec, sort_strategy_and_spec);
+        p = strchr(spec, ':');
+        if (p)
         {
-            yaz_log(YLOG_LOG, "applying %s %s", sort_strategy, sort_spec);
-            ZOOM_query_sortby2(q, sort_strategy, sort_spec);
+            *p++ = '\0'; /* cut the string in two */
+            while (*p == ' ')
+                p++;
+            if (increasing)
+                strcat(p, " <");
+            else
+                strcat(p, " >");
+            yaz_log(YLOG_LOG, "applying %s %s", spec, p);
+            ZOOM_query_sortby2(q, spec, p);
         }
-        rs = ZOOM_connection_search(link, q);
-        ZOOM_query_destroy(q);
     }
+    rs = ZOOM_connection_search(link, q);
+    ZOOM_query_destroy(q);
     ZOOM_resultset_destroy(cl->resultset);
     cl->resultset = rs;
     connection_continue(co);
index d7da767..2fb8ada 100644 (file)
@@ -76,8 +76,8 @@ int client_prep_connection(struct client *cl,
                            int operation_timeout, int session_timeout,
                            iochan_man_t iochan,
                            const struct timeval *abstime);
-void client_start_search(struct client *cl, const char *sort_strategy,
-    const char *sort_spec);
+void client_start_search(struct client *cl, const char *sort_strategy_and_spec,
+                         int increasing);
 void client_set_session(struct client *cl, struct session *se);
 int client_is_active(struct client *cl);
 int client_is_active_preferred(struct client *cl);
index 9874653..a4d5106 100644 (file)
@@ -969,7 +969,7 @@ static void cmd_show(struct http_channel *c)
         release_session(c, s);
         return;
     }
-    search_sort(s->psession, sp->name, sp->increasing);
+    session_sort(s->psession, sp->name, sp->increasing);
 
     if (block)
     {
@@ -1039,8 +1039,8 @@ static void cmd_search(struct http_channel *c)
         release_session(c, s);
         return;
     }
-    code = search(s->psession, query, startrecs, maxrecs, filter, limit,
-                  &addinfo);
+    code = session_search(s->psession, query, startrecs, maxrecs, filter, limit,
+                          &addinfo, "relevance", 0);
     if (code)
     {
         error(rs, code, addinfo);
index c9628e4..0e5427e 100644 (file)
@@ -597,7 +597,32 @@ int session_is_preferred_clients_ready(struct session *s)
     return res == 0;
 }
 
-void search_sort(struct session *se, const char *field, int increasing)
+static const char *get_strategy_plus_sort(struct client *l, const char *field)
+{
+    struct session_database *sdb = client_get_database(l);
+    struct setting *s;
+
+    const char *strategy_plus_sort = 0;
+    
+    for (s = sdb->settings[PZ_SORTMAP]; s; s = s->next)
+    {
+        char *p = strchr(s->name + 3, ':');
+        if (!p)
+        {
+            yaz_log(YLOG_WARN, "Malformed sortmap name: %s", s->name);
+            continue;
+        }
+        p++;
+        if (!strcmp(p, field))
+        {
+            strategy_plus_sort = s->value;
+            break;
+        }
+    }
+    return strategy_plus_sort;
+}
+
+void session_sort(struct session *se, const char *field, int increasing)
 {
     struct session_sorted_results *sr;
     struct client_list *l;
@@ -628,26 +653,7 @@ void search_sort(struct session *se, const char *field, int increasing)
     for (l = se->clients; l; l = l->next)
     {
         struct client *cl = l->client;
-        struct session_database *sdb = client_get_database(cl);
-        struct setting *s;
-        const char *strategy_plus_sort = 0;
-        
-        for (s = sdb->settings[PZ_SORTMAP]; s; s = s->next)
-        {
-            char *p = strchr(s->name + 3, ':');
-            if (!p)
-            {
-                yaz_log(YLOG_WARN, "Malformed sortmap name: %s", s->name);
-                continue;
-            }
-            p++;
-            if (!strcmp(p, field))
-            {
-                strategy_plus_sort = s->value;
-                break;
-            }
-        }
-        
+        const char *strategy_plus_sort = get_strategy_plus_sort(cl, field);
         if (strategy_plus_sort)
         {
             struct timeval tval;
@@ -655,30 +661,20 @@ void search_sort(struct session *se, const char *field, int increasing)
                                        se->service->z3950_session_timeout,
                                        se->service->server->iochan_man,
                                        &tval))
-            {
-                char **array;
-                int num;
-                nmem_strsplit(se->nmem, ":", strategy_plus_sort, &array, &num);
-                
-                if (num == 2)
-                {
-                    const char *sort_spec = array[1];
-                    while (*sort_spec == ' ')
-                        sort_spec++;
-                    client_start_search(cl, array[0], sort_spec);
-                }
-            }
+                client_start_search(cl, strategy_plus_sort, increasing);
         }
     }
     session_leave(se);
 }
 
-enum pazpar2_error_code search(struct session *se,
-                               const char *query,
-                               const char *startrecs, const char *maxrecs,
-                               const char *filter,
-                               const char *limit,
-                               const char **addinfo)
+enum pazpar2_error_code session_search(struct session *se,
+                                       const char *query,
+                                       const char *startrecs,
+                                       const char *maxrecs,
+                                       const char *filter,
+                                       const char *limit,
+                                       const char **addinfo,
+                                       const char *sort_field, int increasing)
 {
     int live_channels = 0;
     int no_working = 0;
@@ -703,8 +699,8 @@ enum pazpar2_error_code search(struct session *se,
 
     /* reset list of sorted results and clear to relevance search */
     se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
-    se->sorted_results->field = nmem_strdup(se->nmem, "relevance");
-    se->sorted_results->increasing = 0;
+    se->sorted_results->field = nmem_strdup(se->nmem, sort_field);
+    se->sorted_results->increasing = increasing;
     se->sorted_results->next = 0;
     
     live_channels = select_targets(se, filter);
@@ -729,6 +725,7 @@ enum pazpar2_error_code search(struct session *se,
     for (l = se->clients; l; l = l->next)
     {
         struct client *cl = l->client;
+        const char *strategy_plus_sort = get_strategy_plus_sort(cl, sort_field);
 
         if (maxrecs)
             client_set_maxrecs(cl, atoi(maxrecs));
@@ -745,7 +742,7 @@ enum pazpar2_error_code search(struct session *se,
                                        se->service->z3950_session_timeout,
                                        se->service->server->iochan_man,
                                        &tval))
-                client_start_search(cl, 0, 0);
+                client_start_search(cl, strategy_plus_sort, increasing);
         }
     }
     facet_limits_destroy(facet_limits);
@@ -1535,6 +1532,9 @@ static int ingest_to_cluster(struct client *cl,
     xmlChar *value = 0;
     struct session *se = client_get_session(cl);
     struct conf_service *service = se->service;
+    int term_factor = 1;
+    struct record_cluster *cluster;
+    struct session_database *sdb = client_get_database(cl);
     struct record *record = record_create(se->nmem, 
                                           service->num_metadata,
                                           service->num_sortkeys, cl,
@@ -1594,25 +1594,22 @@ static int ingest_to_cluster(struct client *cl,
         }
     }
 
-    struct record_cluster *cluster = reclist_insert(se->reclist,
-                                                    service, 
-                                                    record,
-                                                    mergekey_norm,
-                                                    &se->total_merged);
+    cluster = reclist_insert(se->reclist, service, record,
+                             mergekey_norm, &se->total_merged);
     if (!cluster)
         return -1;
 
-    struct session_database *sdb = client_get_database(cl);
-    int term_factor = 1;
-    const char *use_term_factor_str =
-        session_setting_oneval(sdb, PZ_TERMLIST_TERM_FACTOR);
-    if (use_term_factor_str && use_term_factor_str[0] == '1')
     {
-        int maxrecs = client_get_maxrecs(cl);
-        int hits = (int) client_get_hits(cl);
-        term_factor = MAX(hits, maxrecs) /  MAX(1, maxrecs);
-        assert(term_factor >= 1);
-        yaz_log(YLOG_DEBUG, "Using term factor: %d (%d / %d)", term_factor, MAX(hits, maxrecs), MAX(1, maxrecs));
+        const char *use_term_factor_str =
+            session_setting_oneval(sdb, PZ_TERMLIST_TERM_FACTOR);
+        if (use_term_factor_str && use_term_factor_str[0] == '1')
+        {
+            int maxrecs = client_get_maxrecs(cl);
+            int hits = (int) client_get_hits(cl);
+            term_factor = MAX(hits, maxrecs) /  MAX(1, maxrecs);
+            assert(term_factor >= 1);
+            yaz_log(YLOG_DEBUG, "Using term factor: %d (%d / %d)", term_factor, MAX(hits, maxrecs), MAX(1, maxrecs));
+        }
     }
 
     if (global_parameters.dump_records)
index b839ef2..38e3a38 100644 (file)
@@ -147,12 +147,14 @@ void destroy_session(struct session *s);
 void session_init_databases(struct session *s);
 void statistics(struct session *s, struct statistics *stat);
 
-void search_sort(struct session *se, const char *field, int increasing);
-
-enum pazpar2_error_code search(struct session *s, const char *query,
-                               const char *startrecs, const char *maxrecs,
-                               const char *filter, const char *limit,
-                               const char **addinfo);
+void session_sort(struct session *se, const char *field, int increasing);
+
+enum pazpar2_error_code session_search(struct session *s, const char *query,
+                                       const char *startrecs,
+                                       const char *maxrecs,
+                                       const char *filter, const char *limit,
+                                       const char **addinfo,
+                                       const char *sort_field, int increasing);
 struct record_cluster **show_range_start(struct session *s,
                                          struct reclist_sortparms *sp,
                                          int start,
index b497907..d93248f 100644 (file)
@@ -1,8 +1,10 @@
 http://localhost:9763/search.pz2?command=init&clear=1&pz:elements%5Bmy%5D=F&pz:requestsyntax%5Bmy%5D=usmarc&pz:nativesyntax%5Bmy%5D=iso2709&pz:xslt%5Bmy%5D=marc21_test.xsl&pz:name%5Bmy%5D=marcserver&pz:url%5Bmy%5D=z3950.indexdata.com%2Fmarc
 http://localhost:9763/search.pz2?session=1&command=search&query=computer
 2 http://localhost:9763/search.pz2?session=1&command=show&block=1
-http://localhost:9763/search.pz2?session=1&command=settings&pz:url%5Bmy%5D=z3950.indexdata.com%2Fgils&pz:sortmap:title%5Bmy%5D=type7:title+%3C
-http://localhost:9763/search.pz2?session=1&command=search&query=computer
+http://localhost:9763/search.pz2?session=1&command=settings&pz:url%5Bmy%5D=z3950.indexdata.com%2Fgils&pz:sortmap:title%5Bmy%5D=type7:title
+http://localhost:9763/search.pz2?session=1&command=search&query=the&maxrecs=3
 2 http://localhost:9763/search.pz2?session=1&command=show&block=1
 2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:1
-1 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:0
+2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:1
+2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:0
+2 http://localhost:9763/search.pz2?session=1&command=show&block=1&sort=title:0
diff --git a/test/test_url_10.res b/test/test_url_10.res
new file mode 100644 (file)
index 0000000..c6c0bf1
--- /dev/null
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<show><status>OK</status>
+<activeclients>0</activeclients>
+<merged>7</merged>
+<total>31</total>
+<start>0</start>
+<num>7</num>
+<hit>
+
+<md-title>WATER WELL DATA</md-title>
+<md-description>This database contains the following information on water wells in Nevada: driller&apos;s name, owner&apos;s name, location, formations encountered, lithologic descriptions, water level, and water quality</md-description><location id="my" name="marcserver">
+<md-title>WATER WELL DATA</md-title>
+<md-description tag="520">This database contains the following information on water wells in Nevada: driller&apos;s name, owner&apos;s name, location, formations encountered, lithologic descriptions, water level, and water quality</md-description>
+<md-description tag="513">1930-PRESENT</md-description></location>
+<recid>title water well data author medium book</recid>
+</hit>
+<hit>
+
+<md-title>UTAH GEOLOGIC MAP BIBLIOGRAPHY</md-title>
+<md-description>This collection consists of theses, dissertations, and other unpublished maps as well as published maps of the geology of Utah.  Some maps of the collection are xeroxed from limited collections.  Cross-sections are included in set.  Data file consists of map bibliography</md-description><location id="my" name="marcserver">
+<md-title>UTAH GEOLOGIC MAP BIBLIOGRAPHY</md-title>
+<md-description tag="520">This collection consists of theses, dissertations, and other unpublished maps as well as published maps of the geology of Utah.  Some maps of the collection are xeroxed from limited collections.  Cross-sections are included in set.  Data file consists of map bibliography</md-description>
+<md-description tag="513">-PRESENT</md-description></location>
+<recid>title utah geologic map bibliography author medium book</recid>
+</hit>
+<hit>
+
+<md-title>UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS</md-title>
+<md-description>Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles</md-description><location id="my" name="marcserver">
+<md-title>UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS</md-title>
+<md-description tag="520">Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles</md-description>
+<md-description tag="513">-PRESENT</md-description></location>
+<recid>title utah geological and mineral survey publications author medium book</recid>
+</hit>
+<hit>
+
+<md-title>UTAH EARTHQUAKE EPICENTERS</md-title>
+<md-description>Five files of epicenter data arranged by date comprise this data set.  These files are searchable by magnitude and longitude/latitude.  Hardcopy of listing and plot of requested area available.  Epicenter location and date, magnitude, and focal depth available</md-description><location id="my" name="marcserver">
+<md-title>UTAH EARTHQUAKE EPICENTERS</md-title>
+<md-description tag="520">Five files of epicenter data arranged by date comprise this data set.  These files are searchable by magnitude and longitude/latitude.  Hardcopy of listing and plot of requested area available.  Epicenter location and date, magnitude, and focal depth available</md-description>
+<md-description tag="513">-PRESENT</md-description></location>
+<recid>title utah earthquake epicenters author medium book</recid>
+</hit>
+<hit>
+
+<md-title>BIBLIOGRAPHY OF MAINE GEOLOGY</md-title>
+<md-description>This data base is a computer based bibliography of marine geology.  It allows searching by topic and geographic location, similar to GEOREF.  It is currently under development to replace the printed Bibliography of Marine Geology</md-description><location id="my" name="marcserver">
+<md-title>BIBLIOGRAPHY OF MAINE GEOLOGY</md-title>
+<md-description tag="520">This data base is a computer based bibliography of marine geology.  It allows searching by topic and geographic location, similar to GEOREF.  It is currently under development to replace the printed Bibliography of Marine Geology</md-description>
+<md-description tag="513">1692-PRESENT</md-description></location>
+<recid>title bibliography of maine geology author medium book</recid>
+</hit>
+<hit>
+
+<md-title>AUTOMATED FLOOD WARNING NETWORK</md-title>
+<md-description>The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values)</md-description><location id="my" name="marcserver">
+<md-title>AUTOMATED FLOOD WARNING NETWORK</md-title>
+<md-description tag="520">The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values)</md-description>
+<md-description tag="513">1982-PRESENT</md-description></location>
+<recid>title automated flood warning network author medium book</recid>
+</hit>
+<hit>
+
+<md-title>APPLIED GEOLOGY FILE</md-title>
+<md-description>Reports and memorandums completed by the Site Investigation Section comprise this data set.  Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems</md-description><location id="my" name="marcserver">
+<md-title>APPLIED GEOLOGY FILE</md-title>
+<md-description tag="520">Reports and memorandums completed by the Site Investigation Section comprise this data set.  Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems</md-description>
+<md-description tag="513">1970-PRESENT</md-description></location>
+<recid>title applied geology file author medium book</recid>
+</hit>
+</show>
\ No newline at end of file
diff --git a/test/test_url_9.res b/test/test_url_9.res
new file mode 100644 (file)
index 0000000..770d050
--- /dev/null
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<show><status>OK</status>
+<activeclients>0</activeclients>
+<merged>5</merged>
+<total>31</total>
+<start>0</start>
+<num>5</num>
+<hit>
+
+<md-title>UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS</md-title>
+<md-description>Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles</md-description><location id="my" name="marcserver">
+<md-title>UTAH GEOLOGICAL AND MINERAL SURVEY PUBLICATIONS</md-title>
+<md-description tag="520">Publications of the Utah Geological and Mineral Survey include reports of investigation, special studies, bulletins, open-file reports, geologic map of Utah, publications of geological societies, geologic and oil and mineral maps, coal monographs, circulars, water resource bulletins, and reprints of articles</md-description>
+<md-description tag="513">-PRESENT</md-description></location>
+<recid>title utah geological and mineral survey publications author medium book</recid>
+</hit>
+<hit>
+
+<md-title>UTAH EARTHQUAKE EPICENTERS</md-title>
+<md-description>Five files of epicenter data arranged by date comprise this data set.  These files are searchable by magnitude and longitude/latitude.  Hardcopy of listing and plot of requested area available.  Epicenter location and date, magnitude, and focal depth available</md-description><location id="my" name="marcserver">
+<md-title>UTAH EARTHQUAKE EPICENTERS</md-title>
+<md-description tag="520">Five files of epicenter data arranged by date comprise this data set.  These files are searchable by magnitude and longitude/latitude.  Hardcopy of listing and plot of requested area available.  Epicenter location and date, magnitude, and focal depth available</md-description>
+<md-description tag="513">-PRESENT</md-description></location>
+<recid>title utah earthquake epicenters author medium book</recid>
+</hit>
+<hit>
+
+<md-title>BIBLIOGRAPHY OF MAINE GEOLOGY</md-title>
+<md-description>This data base is a computer based bibliography of marine geology.  It allows searching by topic and geographic location, similar to GEOREF.  It is currently under development to replace the printed Bibliography of Marine Geology</md-description><location id="my" name="marcserver">
+<md-title>BIBLIOGRAPHY OF MAINE GEOLOGY</md-title>
+<md-description tag="520">This data base is a computer based bibliography of marine geology.  It allows searching by topic and geographic location, similar to GEOREF.  It is currently under development to replace the printed Bibliography of Marine Geology</md-description>
+<md-description tag="513">1692-PRESENT</md-description></location>
+<recid>title bibliography of maine geology author medium book</recid>
+</hit>
+<hit>
+
+<md-title>AUTOMATED FLOOD WARNING NETWORK</md-title>
+<md-description>The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values)</md-description><location id="my" name="marcserver">
+<md-title>AUTOMATED FLOOD WARNING NETWORK</md-title>
+<md-description tag="520">The new system will collect rainfall, temperature, soil moisture, wind speed and direction, humidity, and streamflow (above certain values)</md-description>
+<md-description tag="513">1982-PRESENT</md-description></location>
+<recid>title automated flood warning network author medium book</recid>
+</hit>
+<hit>
+
+<md-title>APPLIED GEOLOGY FILE</md-title>
+<md-description>Reports and memorandums completed by the Site Investigation Section comprise this data set.  Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems</md-description><location id="my" name="marcserver">
+<md-title>APPLIED GEOLOGY FILE</md-title>
+<md-description tag="520">Reports and memorandums completed by the Site Investigation Section comprise this data set.  Subjects include geotechnical appraisal of public facility sites before and during construction and evaluations of hazardous waste problems</md-description>
+<md-description tag="513">1970-PRESENT</md-description></location>
+<recid>title applied geology file author medium book</recid>
+</hit>
+</show>
\ No newline at end of file