Minor refactor of session_sort_unlocked
[pazpar2-moved-to-github.git] / src / session.c
index d4e4a3c..c6b1f21 100644 (file)
@@ -57,7 +57,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/oid_db.h>
 #include <yaz/snprintf.h>
 
-#define USE_TIMING 0
+#define USE_TIMING 1
 #if USE_TIMING
 #include <yaz/timing.h>
 #endif
@@ -181,7 +181,8 @@ static void session_normalize_facet(struct session *s, const char *type,
     prt = pp2_charset_token_create(service->charsets, icu_chain_id);
     if (!prt)
     {
-        yaz_log(YLOG_FATAL, "Unknown ICU chain '%s' for facet of type '%s'",
+        session_log(s, YLOG_FATAL,
+                    "Unknown ICU chain '%s' for facet of type '%s'",
                 icu_chain_id, type);
         wrbuf_destroy(facet_wrbuf);
         wrbuf_destroy(display_wrbuf);
@@ -623,7 +624,6 @@ int session_is_preferred_clients_ready(struct session *s)
 static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
 {
     reclist_destroy(se->reclist);
-    se->reclist = 0;
     if (nmem_total(se->nmem))
         session_log(se, YLOG_DEBUG, "NMEN operation usage %zd",
                     nmem_total(se->nmem));
@@ -631,6 +631,8 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
     se->total_records = se->total_merged = 0;
     se->num_termlists = 0;
 
+    relevance_clear(se->relevance);
+
     /* reset list of sorted results and clear to relevance search */
     se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
     se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
@@ -644,30 +646,44 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
     se->reclist = reclist_create(se->nmem);
 }
 
-static void session_sort_unlocked(struct session *se, struct reclist_sortparms *sp)
+static void session_sort_unlocked(struct session *se,
+                                  struct reclist_sortparms *sp,
+                                  const char *mergekey)
 {
-    struct reclist_sortparms *sr;
     struct client_list *l;
     const char *field = sp->name;
     int increasing = sp->increasing;
     int type  = sp->type;
     int clients_research = 0;
 
-    yaz_log(YLOG_LOG, "session_sort field=%s increasing=%d type=%d", field, increasing, type);
-    /* see if we already have sorted for this criteria */
-    for (sr = se->sorted_results; sr; sr = sr->next)
+    session_log(se, YLOG_DEBUG, "session_sort field=%s increasing=%d type=%d",
+                field, increasing, type);
+
+    if (mergekey && strcmp(se->mergekey, mergekey))
     {
-        if (!reclist_sortparms_cmp(sr,sp))
-            break;
+        /* new mergekey must research/reingest anyway */
+        assert(mergekey);
+        xfree(se->mergekey);
+        se->mergekey = *mergekey ? xstrdup(mergekey) : 0;
+        clients_research = 1;
+        session_log(se, YLOG_DEBUG, "search_sort: new mergekey = %s",
+                    mergekey);
     }
-    if (sr)
+    if (clients_research == 0)
     {
-        session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d already fetched",
-                    field, increasing, type);
-        return;
+        struct reclist_sortparms *sr;
+        for (sr = se->sorted_results; sr; sr = sr->next)
+            if (!reclist_sortparms_cmp(sr, sp))
+                break;
+        if (sr)
+        {
+            session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d already fetched",
+                        field, increasing, type);
+            return;
+        }
     }
     session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d must fetch",
-                    field, increasing, type);
+                field, increasing, type);
 
     // We need to reset reclist on every sort that changes the records, not just for position
     // So if just one client requires new searching, we need to clear set.
@@ -680,13 +696,17 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
         client_parse_init(cl, 1);
         clients_research += client_parse_sort(cl, sp);
     }
-    if (clients_research) {
-        yaz_log(YLOG_DEBUG, "Reset results due to %d clients researching", clients_research);
+    if (clients_research)
+    {
+        session_log(se, YLOG_DEBUG,
+                    "Reset results due to %d clients researching",
+                    clients_research);
         session_clear_set(se, sp);
     }
-    else {
+    else
+    {
         // A new sorting based on same record set
-        sr = nmem_malloc(se->nmem, sizeof(*sr));
+        struct reclist_sortparms *sr = nmem_malloc(se->nmem, sizeof(*sr));
         sr->name = nmem_strdup(se->nmem, field);
         sr->increasing = increasing;
         sr->type = type;
@@ -706,16 +726,19 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
             client_start_search(cl);
         }
         else {
-            yaz_log(YLOG_DEBUG, "Client %s: No re-start/ingest in show. Wrong client state: %d",
+            session_log(se, YLOG_DEBUG,
+                        "Client %s: No re-start/ingest in show. Wrong client state: %d",
                         client_get_id(cl), client_get_state(cl));
         }
 
     }
 }
 
-void session_sort(struct session *se, struct reclist_sortparms *sp) {
+void session_sort(struct session *se, struct reclist_sortparms *sp,
+                  const char *mergekey)
+{
     //session_enter(se, "session_sort");
-    session_sort_unlocked(se, sp);
+    session_sort_unlocked(se, sp, mergekey);
     //session_leave(se, "session_sort");
 }
 
@@ -727,7 +750,8 @@ enum pazpar2_error_code session_search(struct session *se,
                                        const char *filter,
                                        const char *limit,
                                        const char **addinfo,
-                                       struct reclist_sortparms *sp)
+                                       struct reclist_sortparms *sp,
+                                       const char *mergekey)
 {
     int live_channels = 0;
     int no_working = 0;
@@ -748,6 +772,12 @@ enum pazpar2_error_code session_search(struct session *se,
     session_enter(se, "session_search");
     se->settings_modified = 0;
 
+    if (mergekey)
+    {
+        xfree(se->mergekey);
+        se->mergekey = *mergekey ? xstrdup(mergekey) : 0;
+    }
+
     session_clear_set(se, sp);
     relevance_destroy(&se->relevance);
 
@@ -921,6 +951,7 @@ void session_destroy(struct session *se)
     normalize_cache_destroy(se->normalize_cache);
     relevance_destroy(&se->relevance);
     reclist_destroy(se->reclist);
+    xfree(se->mergekey);
     if (nmem_total(se->nmem))
         session_log(se, YLOG_DEBUG, "NMEN operation usage %zd", nmem_total(se->nmem));
     if (nmem_total(se->session_nmem))
@@ -960,7 +991,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->number_of_warnings_unknown_elements = 0;
     session->number_of_warnings_unknown_metadata = 0;
     session->num_termlists = 0;
-    session->reclist = 0;
+    session->reclist = reclist_create(nmem);
     session->clients_active = 0;
     session->clients_cached = 0;
     session->settings_modified = 0;
@@ -969,6 +1000,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->databases = 0;
     session->sorted_results = 0;
     session->facet_limits = 0;
+    session->mergekey = 0;
 
     for (i = 0; i <= SESSION_WATCH_MAX; i++)
     {
@@ -1202,22 +1234,19 @@ struct record_cluster *show_single_start(struct session *se, const char *id,
     session_enter(se, "show_single_start");
     *prev_r = 0;
     *next_r = 0;
-    if (se->reclist)
-    {
-        reclist_limit(se->reclist, se);
+    reclist_limit(se->reclist, se);
 
-        reclist_enter(se->reclist);
-        while ((r = reclist_read_record(se->reclist)))
+    reclist_enter(se->reclist);
+    while ((r = reclist_read_record(se->reclist)))
+    {
+        if (!strcmp(r->recid, id))
         {
-            if (!strcmp(r->recid, id))
-            {
-                *next_r = reclist_read_record(se->reclist);
-                break;
-            }
-            *prev_r = r;
+            *next_r = reclist_read_record(se->reclist);
+            break;
         }
-        reclist_leave(se->reclist);
+        *prev_r = r;
     }
+    reclist_leave(se->reclist);
     if (!r)
         session_leave(se, "show_single_start");
     return r;
@@ -1229,10 +1258,44 @@ void show_single_stop(struct session *se, struct record_cluster *rec)
 }
 
 
+int session_fetch_more(struct session *se)
+{
+    struct client_list *l;
+    int ret = 0;
+
+    for (l = se->clients_active; l; l = l->next)
+    {
+        struct client *cl = l->client;
+        if (client_get_state(cl) == Client_Idle)
+        {
+            if (client_fetch_more(cl))
+            {
+                session_log(se, YLOG_LOG, "%s: more to fetch",
+                            client_get_id(cl));
+                ret = 1;
+            }
+            else
+            {
+                session_log(se, YLOG_LOG, "%s: no more to fetch",
+                            client_get_id(cl));
+            }
+        }
+        else
+        {
+            session_log(se, YLOG_LOG, "%s: no fetch due to state=%s",
+                        client_get_id(cl), client_get_state_str(cl));
+        }
+
+    }
+    return ret;
+}
+
 struct record_cluster **show_range_start(struct session *se,
                                          struct reclist_sortparms *sp,
                                          int start, int *num, int *total,
-                                         Odr_int *sumhits, Odr_int *approx_hits)
+                                         Odr_int *sumhits, Odr_int *approx_hits,
+                                         void (*show_records_ready)(void *data),
+                                         struct http_channel *chan)
 {
     struct record_cluster **recs = 0;
     struct reclist_sortparms *spp;
@@ -1264,16 +1327,25 @@ struct record_cluster **show_range_start(struct session *se,
     reclist_enter(se->reclist);
     *total = reclist_get_num_records(se->reclist);
 
+    for (l = se->clients_active; l; l = l->next)
+        client_update_show_stat(l->client, 0);
+
     for (i = 0; i < start; i++)
-        if (!reclist_read_record(se->reclist))
+    {
+        struct record_cluster *r = reclist_read_record(se->reclist);
+        if (!r)
         {
             *num = 0;
             break;
         }
-
-    if (*num > 0)
-        recs =
-            nmem_malloc(se->nmem, *num * sizeof(struct record_cluster *));
+        else
+        {
+            struct record *rec = r->records;
+            for (;rec; rec = rec->next)
+                client_update_show_stat(rec->client, 1);
+        }
+    }
+    recs = nmem_malloc(se->nmem, (*num > 0 ? *num : 1) * sizeof(*recs));
     for (i = 0; i < *num; i++)
     {
         struct record_cluster *r = reclist_read_record(se->reclist);
@@ -1282,16 +1354,41 @@ struct record_cluster **show_range_start(struct session *se,
             *num = i;
             break;
         }
-        recs[i] = r;
+        else
+        {
+            struct record *rec = r->records;
+            for (;rec; rec = rec->next)
+                client_update_show_stat(rec->client, 1);
+            recs[i] = r;
+        }
     }
     reclist_leave(se->reclist);
 #if USE_TIMING
     yaz_timing_stop(t);
-    yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f",
+    session_log(se, YLOG_LOG, "show %6.5f %3.2f %3.2f",
             yaz_timing_get_real(t), yaz_timing_get_user(t),
             yaz_timing_get_sys(t));
     yaz_timing_destroy(&t);
 #endif
+
+    if (!session_fetch_more(se))
+        session_log(se, YLOG_LOG, "can not fetch more");
+    else
+    {
+        show_range_stop(se, recs);
+        session_log(se, YLOG_LOG, "fetching more in progress");
+        if (session_set_watch(se, SESSION_WATCH_SHOW,
+                              show_records_ready, chan, chan))
+        {
+            session_log(se, YLOG_WARN, "Ignoring show block");
+            session_enter(se, "show_range_start");
+        }
+        else
+        {
+            session_log(se, YLOG_LOG, "session watch OK");
+            return 0;
+        }
+    }
     return recs;
 }
 
@@ -1340,10 +1437,11 @@ static struct record_metadata *record_metadata_init(
     {
         if (attr->children && attr->children->content)
         {
-            if (strcmp((const char *) attr->name, "type"))
-            {  /* skip the "type" attribute.. Its value is already part of
-                  the element in output (md-%s) and so repeating it here
-                  is redundant */
+            if (strcmp((const char *) attr->name, "type")
+                && strcmp((const char *) attr->name, "empty"))
+            {  /* skip the "type" + "empty" attribute..
+                  The "Type" is already part of the element in output
+                  (md-%s) and so repeating it here is redundant */
                 *attrp = nmem_malloc(nmem, sizeof(**attrp));
                 (*attrp)->name =
                     nmem_strdup(nmem, (const char *) attr->name);
@@ -1400,7 +1498,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
             else if (!strcmp(name, (const char *) type))
             {
                 xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
-                if (value)
+                if (value && *value)
                 {
                     const char *norm_str;
                     pp2_charset_token_t prt =
@@ -1419,10 +1517,11 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                             wrbuf_puts(norm_wr, norm_str);
                         }
                     }
-                    xmlFree(value);
                     pp2_charset_token_destroy(prt);
                     no_found++;
                 }
+                if (value)
+                    xmlFree(value);
             }
             xmlFree(type);
         }
@@ -1431,15 +1530,25 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
 }
 
 static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
-                                struct conf_service *service, NMEM nmem)
+                                struct conf_service *service, NMEM nmem,
+                                const char *session_mergekey)
 {
     char *mergekey_norm = 0;
     xmlNode *root = xmlDocGetRootElement(doc);
     WRBUF norm_wr = wrbuf_alloc();
+    xmlChar *mergekey;
 
-    /* consider mergekey from XSL first */
-    xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey");
-    if (mergekey)
+    if (session_mergekey)
+    {
+        int i, num = 0;
+        char **values = 0;
+        nmem_strsplit_escape2(nmem, ",", session_mergekey, &values,
+                              &num, 1, '\\', 1);
+
+        for (i = 0; i < num; i++)
+            get_mergekey_from_doc(doc, root, values[i], service, norm_wr);
+    }
+    else if ((mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
     {
         const char *norm_str;
         pp2_charset_token_t prt =
@@ -1597,7 +1706,8 @@ int ingest_record(struct client *cl, const char *rec,
         return -2;
     }
 
-    mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem);
+    mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem,
+        se->mergekey);
     if (!mergekey_norm)
     {
         session_log(se, YLOG_WARN, "Got no mergekey");
@@ -1605,7 +1715,7 @@ int ingest_record(struct client *cl, const char *rec,
         return -1;
     }
     session_enter(se, "ingest_record");
-    if (client_get_session(cl) == se)
+    if (client_get_session(cl) == se && se->relevance)
         ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm);
     session_leave(se, "ingest_record");
 
@@ -1795,10 +1905,17 @@ static int ingest_to_cluster(struct client *cl,
 
             type = xmlGetProp(n, (xmlChar *) "type");
             value = xmlNodeListGetString(xdoc, n->children, 1);
-
-            if (!type || !value || !*value)
+            if (!type)
                 continue;
-
+            if (!value || !*value)
+            {
+                xmlChar *empty = xmlGetProp(n, (xmlChar *) "empty");
+                if (!empty)
+                    continue;
+                if (value)
+                    xmlFree(value);
+                value = empty;
+            }
             md_field_id
                 = conf_service_metadata_field_id(service, (const char *) type);
             if (md_field_id < 0)
@@ -1854,7 +1971,8 @@ static int ingest_to_cluster(struct client *cl,
             int hits = (int) client_get_hits(cl);
             term_factor = MAX(hits, maxrecs) /  MAX(1, maxrecs);
             assert(term_factor >= 1);
-            yaz_log(YLOG_DEBUG, "Using term factor: %d (%d / %d)", term_factor, MAX(hits, maxrecs), MAX(1, maxrecs));
+            session_log(se, YLOG_DEBUG, "Using term factor: %d (%d / %d)",
+                        term_factor, MAX(hits, maxrecs), MAX(1, maxrecs));
         }
     }
 
@@ -2075,7 +2193,7 @@ void session_log(struct session *s, int level, const char *fmt, ...)
     va_start(ap, fmt);
 
     yaz_vsnprintf(buf, sizeof(buf)-30, fmt, ap);
-    yaz_log(level, "Session %u: %s", s->session_id, buf);
+    yaz_log(level, "Session %u: %s", s ? s->session_id : 0, buf);
 
     va_end(ap);
 }