Work on fetch more
[pazpar2-moved-to-github.git] / src / session.c
index 62f47eb..148f462 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Pazpar2.
-   Copyright (C) 2006-2012 Index Data
+   Copyright (C) 2006-2013 Index Data
 
 Pazpar2 is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -256,8 +256,7 @@ static xmlDoc *record_to_xml(struct session *se,
 
     if (!rdoc)
     {
-        session_log(se, YLOG_FATAL, "Non-wellformed XML received from %s",
-                    db->id);
+        session_log(se, YLOG_WARN, "Non-wellformed XML");
         return 0;
     }
 
@@ -350,8 +349,7 @@ static xmlDoc *normalize_record(struct session *se,
 
         if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms))
         {
-            session_log(se, YLOG_WARN, "Normalize failed from %s",
-                        sdb->database->id);
+            session_log(se, YLOG_WARN, "Normalize failed");
         }
         else
         {
@@ -694,8 +692,10 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
         sr->type = type;
         sr->next = se->sorted_results;
         se->sorted_results = sr;
+        session_log(se, YLOG_DEBUG, "No research/ingesting done");
+        return ;
     }
-    // yaz_log(YLOG_DEBUG, "Restarting search or re-ingesting for clients due to change in sort order");
+    session_log(se, YLOG_DEBUG, "Re- search/ingesting for clients due to change in sort order");
 
     for (l = se->clients_active; l; l = l->next)
     {
@@ -706,7 +706,7 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
             client_start_search(cl);
         }
         else {
-            yaz_log(YLOG_DEBUG, "Client %s: Not re-start/ingest in show. Wrong client state: %d",
+            yaz_log(YLOG_DEBUG, "Client %s: No re-start/ingest in show. Wrong client state: %d",
                         client_get_id(cl), client_get_state(cl));
         }
 
@@ -734,7 +734,6 @@ enum pazpar2_error_code session_search(struct session *se,
     int no_failed_query = 0;
     int no_failed_limit = 0;
     struct client_list *l, *l0;
-    facet_limits_t facet_limits;
     int same_sort_order = 0;
 
     session_log(se, YLOG_DEBUG, "Search");
@@ -764,8 +763,9 @@ enum pazpar2_error_code session_search(struct session *se,
         return PAZPAR2_NO_TARGETS;
     }
 
-    facet_limits = facet_limits_create(limit);
-    if (!facet_limits)
+    facet_limits_destroy(se->facet_limits);
+    se->facet_limits = facet_limits_create(limit);
+    if (!se->facet_limits)
     {
         *addinfo = "limit";
         session_leave(se, "session_search");
@@ -784,7 +784,7 @@ enum pazpar2_error_code session_search(struct session *se,
         if (prepare_map(se, client_get_database(cl)) < 0)
             continue;
 
-        parse_ret = client_parse_query(cl, query, facet_limits, se->service->ccl_bibset);
+        parse_ret = client_parse_query(cl, query, se->facet_limits);
         if (parse_ret == -1)
             no_failed_query++;
         else if (parse_ret == -2)
@@ -799,7 +799,6 @@ enum pazpar2_error_code session_search(struct session *se,
             no_working++;
         }
     }
-    facet_limits_destroy(facet_limits);
     session_reset_active_clients(se, l0);
 
     if (no_working == 0)
@@ -931,6 +930,7 @@ void session_destroy(struct session *se)
         session_log(se, YLOG_DEBUG, "NMEN operation usage %zd", nmem_total(se->nmem));
     if (nmem_total(se->session_nmem))
         session_log(se, YLOG_DEBUG, "NMEN session usage %zd", nmem_total(se->session_nmem));
+    facet_limits_destroy(se->facet_limits);
     nmem_destroy(se->nmem);
     service_destroy(se->service);
     yaz_mutex_destroy(&se->session_mutex);
@@ -973,6 +973,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->nmem = nmem_create();
     session->databases = 0;
     session->sorted_results = 0;
+    session->facet_limits = 0;
 
     for (i = 0; i <= SESSION_WATCH_MAX; i++)
     {
@@ -1014,7 +1015,8 @@ static struct hitsbytarget *hitsbytarget_nb(struct session *se,
         res[*count].records = client_get_num_records(cl);
         res[*count].filtered = client_get_num_records_filtered(cl);
         res[*count].diagnostic =
-            client_get_diagnostic(cl, &res[*count].addinfo);
+            client_get_diagnostic(cl, &res[*count].message,
+                                  &res[*count].addinfo);
         res[*count].state = client_get_state_str(cl);
         res[*count].connected  = client_get_connection(cl) ? 1 : 0;
         session_settings_dump(se, client_get_database(cl), w);
@@ -1207,6 +1209,8 @@ struct record_cluster *show_single_start(struct session *se, const char *id,
     *next_r = 0;
     if (se->reclist)
     {
+        reclist_limit(se->reclist, se);
+
         reclist_enter(se->reclist);
         while ((r = reclist_read_record(se->reclist)))
         {
@@ -1229,67 +1233,95 @@ void show_single_stop(struct session *se, struct record_cluster *rec)
     session_leave(se, "show_single_stop");
 }
 
+
+int session_fetch_more(struct session *se)
+{
+    struct client_list *l;
+    int ret = 0;
+
+    for (l = se->clients_active; l; l = l->next)
+    {
+        struct client *cl = l->client;
+        if (client_get_state(cl) == Client_Idle)
+        {
+            if (client_fetch_more(cl))
+            {
+                session_log(se, YLOG_LOG, "%s: more to fetch",
+                            client_get_id(cl));
+                ret = 1;
+            }
+            else
+            {
+                session_log(se, YLOG_LOG, "%s: no more to fetch",
+                            client_get_id(cl));
+            }
+        }
+        else
+        {
+            session_log(se, YLOG_LOG, "%s: no fetch due to state=%s",
+                        client_get_id(cl), client_get_state_str(cl));
+        }
+
+    }
+    return ret;
+}
+
 struct record_cluster **show_range_start(struct session *se,
                                          struct reclist_sortparms *sp,
-                                         int start, int *num, int *total, Odr_int *sumhits, Odr_int *approx_hits)
+                                         int start, int *num, int *total,
+                                         Odr_int *sumhits, Odr_int *approx_hits)
 {
-    struct record_cluster **recs;
+    struct record_cluster **recs = 0;
     struct reclist_sortparms *spp;
+    struct client_list *l;
     int i;
 #if USE_TIMING
     yaz_timing_t t = yaz_timing_create();
 #endif
     session_enter(se, "show_range_start");
-    recs = nmem_malloc(se->nmem, *num * sizeof(struct record_cluster *));
-    if (!se->relevance)
+    *sumhits = 0;
+    *approx_hits = 0;
+    *total = 0;
+    reclist_limit(se->reclist, se);
+    if (se->relevance)
     {
-        *num = 0;
-        *total = 0;
-        *sumhits = 0;
-        *approx_hits = 0;
-        recs = 0;
-    }
-    else
-    {
-        struct client_list *l;
-
         for (spp = sp; spp; spp = spp->next)
             if (spp->type == Metadata_sortkey_relevance)
             {
                 relevance_prepare_read(se->relevance, se->reclist);
                 break;
             }
-        reclist_sort(se->reclist, sp);
-
-        reclist_enter(se->reclist);
-        *total = reclist_get_num_records(se->reclist);
-
-        *sumhits = 0;
-        *approx_hits = 0;
         for (l = se->clients_active; l; l = l->next) {
             *sumhits += client_get_hits(l->client);
             *approx_hits += client_get_approximation(l->client);
         }
-        for (i = 0; i < start; i++)
-            if (!reclist_read_record(se->reclist))
-            {
-                *num = 0;
-                recs = 0;
-                break;
-            }
+    }
+    reclist_sort(se->reclist, sp);
 
-        for (i = 0; i < *num; i++)
+    reclist_enter(se->reclist);
+    *total = reclist_get_num_records(se->reclist);
+
+    for (i = 0; i < start; i++)
+        if (!reclist_read_record(se->reclist))
         {
-            struct record_cluster *r = reclist_read_record(se->reclist);
-            if (!r)
-            {
-                *num = i;
-                break;
-            }
-            recs[i] = r;
+            *num = 0;
+            break;
         }
-        reclist_leave(se->reclist);
+
+    if (*num > 0)
+        recs =
+            nmem_malloc(se->nmem, *num * sizeof(struct record_cluster *));
+    for (i = 0; i < *num; i++)
+    {
+        struct record_cluster *r = reclist_read_record(se->reclist);
+        if (!r)
+        {
+            *num = i;
+            break;
+        }
+        recs[i] = r;
     }
+    reclist_leave(se->reclist);
 #if USE_TIMING
     yaz_timing_stop(t);
     yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f",
@@ -1618,6 +1650,92 @@ int ingest_record(struct client *cl, const char *rec,
     return ret;
 }
 
+//    struct conf_metadata *ser_md = &service->metadata[md_field_id];
+//    struct record_metadata *rec_md = record->metadata[md_field_id];
+static int match_metadata_local(struct conf_metadata *ser_md,
+                                struct record_metadata *rec_md0,
+                                char **values, int num_v)
+{
+    int i;
+    struct record_metadata *rec_md = rec_md0;
+    for (i = 0; i < num_v; )
+    {
+        if (rec_md)
+        {
+            if (ser_md->type == Metadata_type_year
+                || ser_md->type == Metadata_type_date)
+            {
+                int y = atoi(values[i]);
+                if (y >= rec_md->data.number.min
+                    && y <= rec_md->data.number.max)
+                    break;
+            }
+            else
+            {
+                yaz_log(YLOG_DEBUG, "cmp: '%s' '%s'", rec_md->data.text.disp, values[i]);
+                if (!strcmp(rec_md->data.text.disp, values[i]))
+                {
+                    // Value equals, should not be filtered.
+                    break;
+                }
+            }
+            rec_md = rec_md->next;
+        }
+        else
+        {
+            rec_md = rec_md0;
+            i++;
+        }
+    }
+    return i < num_v ? 1 : 0;
+}
+
+int session_check_cluster_limit(struct session *se, struct record_cluster *rec)
+{
+    int i;
+    struct conf_service *service = se->service;
+    int ret = 1;
+    const char *name;
+    const char *value;
+    NMEM nmem_tmp = nmem_create();
+
+    for (i = 0; (name = facet_limits_get(se->facet_limits, i, &value)); i++)
+    {
+        int j;
+        for (j = 0; j < service->num_metadata; j++)
+        {
+            struct conf_metadata *md = service->metadata + j;
+            if (!strcmp(md->name, name) && md->limitcluster)
+            {
+                char **values = 0;
+                int num = 0;
+                int md_field_id =
+                    conf_service_metadata_field_id(service,
+                                                   md->limitcluster);
+
+                if (md_field_id < 0)
+                {
+                    ret = 0;
+                    break;
+                }
+
+                nmem_strsplit_escape2(nmem_tmp, "|", value, &values,
+                                      &num, 1, '\\', 1);
+
+                if (!match_metadata_local(&service->metadata[md_field_id],
+                                          rec->metadata[md_field_id],
+                                          values, num))
+                {
+                    ret = 0;
+                    break;
+                }
+            }
+        }
+    }
+    nmem_destroy(nmem_tmp);
+    return ret;
+}
+
 // Skip record on non-zero
 static int check_limit_local(struct client *cl,
                              struct record *record,
@@ -1631,60 +1749,45 @@ static int check_limit_local(struct client *cl,
     int l = 0;
     while (!skip_record)
     {
-        struct conf_metadata *ser_md = 0;
-        struct record_metadata *rec_md = 0;
         int md_field_id;
         char **values = 0;
-        int i, num_v = 0;
-
-        const char *name = client_get_facet_limit_local(cl, sdb, &l, nmem_tmp, &num_v, &values);
+        int num_v = 0;
+        const char *name =
+            client_get_facet_limit_local(cl, sdb, &l, nmem_tmp,
+                                         &num_v, &values);
         if (!name)
             break;
 
-        md_field_id = conf_service_metadata_field_id(service, name);
-        if (md_field_id < 0)
+        if (!strcmp(name, "*"))
         {
-            skip_record = 1;
-            break;
+            for (md_field_id = 0; md_field_id < service->num_metadata;
+                 md_field_id++)
+            {
+                if (match_metadata_local(
+                        &service->metadata[md_field_id],
+                        record->metadata[md_field_id],
+                        values, num_v))
+                    break;
+            }
+            if (md_field_id == service->num_metadata)
+                skip_record = 1;
         }
-        ser_md = &service->metadata[md_field_id];
-        rec_md = record->metadata[md_field_id];
-        yaz_log(YLOG_DEBUG, "check limit local %s", name);
-        for (i = 0; i < num_v; )
+        else
         {
-            if (rec_md)
+            md_field_id = conf_service_metadata_field_id(service, name);
+            if (md_field_id < 0)
             {
-                if (ser_md->type == Metadata_type_year
-                    || ser_md->type == Metadata_type_date)
-                {
-                    int y = atoi(values[i]);
-                    if (y >= rec_md->data.number.min
-                        && y <= rec_md->data.number.max)
-                        break;
-                }
-                else
-                {
-                    yaz_log(YLOG_DEBUG, "cmp: '%s' '%s'", rec_md->data.text.disp, values[i]);
-                    if (!strcmp(rec_md->data.text.disp, values[i]))
-                    {
-                        // Value equals, should not be filtered.
-                        break;
-                    }
-                }
-                rec_md = rec_md->next;
+                skip_record = 1;
+                break;
             }
-            else
+            if (!match_metadata_local(
+                    &service->metadata[md_field_id],
+                    record->metadata[md_field_id],
+                    values, num_v))
             {
-                rec_md = record->metadata[md_field_id];
-                i++;
+                skip_record = 1;
             }
         }
-        // At end , not match
-        if (i == num_v)
-        {
-            skip_record = 1;
-            break;
-        }
     }
     nmem_destroy(nmem_tmp);
     return skip_record;
@@ -1703,6 +1806,7 @@ static int ingest_to_cluster(struct client *cl,
     struct conf_service *service = se->service;
     int term_factor = 1;
     struct record_cluster *cluster;
+    struct record_metadata **metadata0;
     struct session_database *sdb = client_get_database(cl);
     struct record *record = record_create(se->nmem,
                                           service->num_metadata,
@@ -1798,6 +1902,11 @@ static int ingest_to_cluster(struct client *cl,
 
     relevance_newrec(se->relevance, cluster);
 
+    // original metadata, to check if first existence of a field
+    metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata);
+    memcpy(metadata0, cluster->metadata,
+           sizeof(*metadata0) * service->num_metadata);
+
     // now parsing XML record and adding data to cluster or record metadata
     for (n = root->children; n; n = n->next)
     {
@@ -1843,6 +1952,9 @@ static int ingest_to_cluster(struct client *cl,
             // merged metadata
             rec_md = record_metadata_init(se->nmem, (const char *) value,
                                           ser_md->type, 0);
+
+            // see if the field was not in cluster already (from beginning)
+
             if (!rec_md)
                 continue;
 
@@ -1851,9 +1963,16 @@ static int ingest_to_cluster(struct client *cl,
 
             wheretoput = &cluster->metadata[md_field_id];
 
-            // and polulate with data:
-            // assign cluster or record based on merge action
-            if (ser_md->merge == Metadata_merge_unique)
+            if (ser_md->merge == Metadata_merge_first)
+            {
+                if (!metadata0[md_field_id])
+                {
+                    while (*wheretoput)
+                        wheretoput = &(*wheretoput)->next;
+                    *wheretoput = rec_md;
+                }
+            }
+            else if (ser_md->merge == Metadata_merge_unique)
             {
                 while (*wheretoput)
                 {
@@ -1979,6 +2098,7 @@ static int ingest_to_cluster(struct client *cl,
     if (value)
         xmlFree(value);
 
+    xfree(metadata0);
     relevance_donerecord(se->relevance, cluster);
     se->total_records++;