Fix session_nmem may be used by multiple threads PAZ-962
[pazpar2-moved-to-github.git] / src / session.c
index 9091b6b..69d05ae 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Pazpar2.
-   Copyright (C) 2006-2013 Index Data
+   Copyright (C) Index Data
 
 Pazpar2 is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -57,7 +57,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/oid_db.h>
 #include <yaz/snprintf.h>
 
-#define USE_TIMING 1
+#define USE_TIMING 0
 #if USE_TIMING
 #include <yaz/timing.h>
 #endif
@@ -883,7 +883,7 @@ void session_init_databases(struct session *se)
 // Probably session_init_databases_fun should be refactored instead of
 // called here.
 static struct session_database *load_session_database(struct session *se,
-                                                      char *id)
+                                                      const char *id)
 {
     struct database *db = new_database_inherit_settings(id, se->session_nmem, se->service->settings);
     session_init_databases_fun((void*) se, db);
@@ -894,7 +894,7 @@ static struct session_database *load_session_database(struct session *se,
 
 // Find an existing session database. If not found, load it
 static struct session_database *find_session_database(struct session *se,
-                                                      char *id)
+                                                      const char *id)
 {
     struct session_database *sdb;
 
@@ -905,36 +905,39 @@ static struct session_database *find_session_database(struct session *se,
 }
 
 // Apply a session override to a database
-void session_apply_setting(struct session *se, char *dbname, char *setting,
-                           char *value)
+void session_apply_setting(struct session *se, const char *dbname,
+                           const char *name, const char *value)
 {
-    struct session_database *sdb = find_session_database(se, dbname);
-    struct conf_service *service = se->service;
-    struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new));
-    int offset = settings_create_offset(service, setting);
-
-    expand_settings_array(&sdb->settings, &sdb->num_settings, offset,
-                          se->session_nmem);
-    new->precedence = 0;
-    new->target = dbname;
-    new->name = setting;
-    new->value = value;
-    new->next = sdb->settings[offset];
-    sdb->settings[offset] = new;
-
-    se->settings_modified = 1;
-
-    // Force later recompute of settings-driven data structures
-    // (happens when a search starts and client connections are prepared)
-    switch (offset)
-    {
-    case PZ_XSLT:
-        if (sdb->map)
-        {
+    session_enter(se, "session_apply_setting");
+    {
+        struct session_database *sdb = find_session_database(se, dbname);
+        struct conf_service *service = se->service;
+        struct setting *s;
+        int offset = settings_create_offset(service, name);
+
+        expand_settings_array(&sdb->settings, &sdb->num_settings, offset,
+                              se->session_nmem);
+        // Force later recompute of settings-driven data structures
+        // (happens when a search starts and client connections are prepared)
+        if (offset == PZ_XSLT)
             sdb->map = 0;
+        se->settings_modified = 1;
+        for (s = sdb->settings[offset]; s; s = s->next)
+            if (!strcmp(s->name, name) &&
+                dbname && s->target && !strcmp(dbname, s->target))
+                break;
+        if (!s)
+        {
+            s = nmem_malloc(se->session_nmem, sizeof(*s));
+            s->precedence = 0;
+            s->target = nmem_strdup(se->session_nmem, dbname);
+            s->name = nmem_strdup(se->session_nmem, name);
+            s->next = sdb->settings[offset];
+            sdb->settings[offset] = s;
         }
-        break;
+        s->value = nmem_strdup(se->session_nmem, value);
     }
+    session_leave(se, "session_apply_setting");
 }
 
 void session_destroy(struct session *se)
@@ -1036,13 +1039,13 @@ static struct hitsbytarget *hitsbytarget_nb(struct session *se,
         WRBUF w = wrbuf_alloc();
         const char *name = session_setting_oneval(client_get_database(cl),
                                                   PZ_NAME);
-
         res[*count].id = client_get_id(cl);
         res[*count].name = *name ? name : "Unknown";
         res[*count].hits = client_get_hits(cl);
         res[*count].approximation = client_get_approximation(cl);
-        res[*count].records = client_get_num_records(cl);
-        res[*count].filtered = client_get_num_records_filtered(cl);
+        res[*count].records = client_get_num_records(cl,
+                                                     &res[*count].filtered,
+                                                     0, 0);
         res[*count].diagnostic =
             client_get_diagnostic(cl, &res[*count].message,
                                   &res[*count].addinfo);
@@ -1278,8 +1281,24 @@ int session_fetch_more(struct session *se)
             }
             else
             {
-                session_log(se, YLOG_LOG, "%s: no more to fetch",
-                            client_get_id(cl));
+                int filtered;
+                int ingest_failures;
+                int record_failures;
+                int num = client_get_num_records(
+                    cl, &filtered, &ingest_failures, &record_failures);
+
+                session_log(se, YLOG_LOG, "%s: hits=" ODR_INT_PRINTF
+                            " fetched=%d filtered=%d",
+                            client_get_id(cl),
+                            client_get_hits(cl),
+                            num, filtered);
+                if (ingest_failures || record_failures)
+                {
+                    session_log(se, YLOG_WARN, "%s:"
+                                " ingest failures=%d record failures=%d",
+                                client_get_id(cl),
+                                ingest_failures, record_failures);
+                }
             }
         }
         else
@@ -1314,7 +1333,7 @@ struct record_cluster **show_range_start(struct session *se,
     if (se->relevance)
     {
         for (spp = sp; spp; spp = spp->next)
-            if (spp->type == Metadata_sortkey_relevance)
+            if (spp->type == Metadata_type_relevance)
             {
                 relevance_prepare_read(se->relevance, se->reclist);
                 break;
@@ -1455,17 +1474,20 @@ static struct record_metadata *record_metadata_init(
     }
     *attrp = 0;
 
-    if (type == Metadata_type_generic)
+    switch (type)
     {
-        char *p = nmem_strdup(nmem, value);
-
-        p = normalize7bit_generic(p, " ,/.:([");
-
-        rec_md->data.text.disp = p;
+    case Metadata_type_generic:
+    case Metadata_type_skiparticle:
+        if (strstr(value, "://")) /* looks like a URL */
+            rec_md->data.text.disp = nmem_strdup(nmem, value);
+        else
+            rec_md->data.text.disp =
+                normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
         rec_md->data.text.sort = 0;
         rec_md->data.text.snippet = 0;
-    }
-    else if (type == Metadata_type_year || type == Metadata_type_date)
+        break;
+    case Metadata_type_year:
+    case Metadata_type_date:
     {
         int first, last;
         int longdate = 0;
@@ -1478,8 +1500,14 @@ static struct record_metadata *record_metadata_init(
         rec_md->data.number.min = first;
         rec_md->data.number.max = last;
     }
-    else
+    break;
+    case Metadata_type_float:
+        rec_md->data.fnumber = atof(value);
+        break;
+    case Metadata_type_relevance:
+    case Metadata_type_position:
         return 0;
+    }
     return rec_md;
 }
 
@@ -1667,18 +1695,15 @@ static int ingest_to_cluster(struct client *cl,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
-                             const char *mergekey_norm);
+                             struct record_metadata_attr *mergekey);
 
 static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
                              int record_no, NMEM nmem,
                              struct session_database *sdb,
-                             const char **mergekey_norm)
+                             struct record_metadata_attr *mergekeys)
 {
     int ret = 0;
     struct session *se = client_get_session(cl);
-    struct conf_service *service = se->service;
-
-    insert_settings_values(sdb, xdoc, root, service);
 
     if (!check_record_filter(root, sdb))
     {
@@ -1687,20 +1712,9 @@ static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
                     record_no, sdb->database->id);
         return 0;
     }
-    if (!*mergekey_norm)
-    {
-        *mergekey_norm = get_mergekey(xdoc, root, cl, record_no, service, nmem,
-                                      se->mergekey);
-    }
-    if (!*mergekey_norm)
-    {
-        session_log(se, YLOG_WARN, "Got no mergekey for record no %d from %s",
-                    record_no, sdb->database->id);
-        return -1;
-    }
     session_enter(se, "ingest_sub_record");
     if (client_get_session(cl) == se && se->relevance)
-        ret = ingest_to_cluster(cl, xdoc, root, record_no, *mergekey_norm);
+        ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
     session_leave(se, "ingest_sub_record");
 
     return ret;
@@ -1722,10 +1736,19 @@ int ingest_record(struct client *cl, const char *rec,
     struct session_database *sdb = client_get_database(cl);
     struct conf_service *service = se->service;
     xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem);
-    int r = 0;
-    xmlNode *root;
-    const char *mergekey_norm = 0;
+    int r = ingest_xml_record(cl, xdoc, record_no, nmem, 0);
+    client_store_xdoc(cl, record_no, xdoc);
+    return r;
+}
 
+int ingest_xml_record(struct client *cl, xmlDoc *xdoc,
+                      int record_no, NMEM nmem, int cached_copy)
+{
+    struct session *se = client_get_session(cl);
+    struct session_database *sdb = client_get_database(cl);
+    struct conf_service *service = se->service;
+    xmlNode *root;
+    int r = 0;
     if (!xdoc)
         return -1;
 
@@ -1740,19 +1763,71 @@ int ingest_record(struct client *cl, const char *rec,
 
     if (!strcmp((const char *) root->name, "cluster"))
     {
-        for (root = root->children; root; root = root->next)
-            if (root->type == XML_ELEMENT_NODE)
+        int no_merge_keys = 0;
+        int no_merge_dups = 0;
+        xmlNode *sroot;
+        struct record_metadata_attr *mk = 0;
+
+        for (sroot = root->children; sroot; sroot = sroot->next)
+            if (sroot->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) sroot->name, "record"))
             {
-                r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb,
-                    &mergekey_norm);
-                if (r)
+                struct record_metadata_attr **mkp;
+                const char *mergekey_norm =
+                    get_mergekey(xdoc, sroot, cl, record_no, service, nmem,
+                                 se->mergekey);
+                if (!mergekey_norm)
+                {
+                    r = -1;
                     break;
+                }
+                for (mkp = &mk; *mkp; mkp = &(*mkp)->next)
+                    if (!strcmp((*mkp)->value, mergekey_norm))
+                        break;
+                if (!*mkp)
+                {
+                    *mkp = (struct record_metadata_attr*)
+                        nmem_malloc(nmem, sizeof(**mkp));
+                    (*mkp)->name = 0;
+                    (*mkp)->value = nmem_strdup(nmem, mergekey_norm);
+                    (*mkp)->next = 0;
+                    no_merge_keys++;
+                }
+                else
+                    no_merge_dups++;
+            }
+        if (no_merge_keys > 1 || no_merge_dups > 0)
+        {
+            yaz_log(YLOG_LOG, "Got %d mergekeys, %d dups for position %d",
+                    no_merge_keys, no_merge_dups, record_no);
+        }
+        for (sroot = root->children; !r && sroot; sroot = sroot->next)
+            if (sroot->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) sroot->name, "record"))
+            {
+                if (!cached_copy)
+                    insert_settings_values(sdb, xdoc, root, service);
+                r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb,
+                                      mk);
             }
     }
     else if (!strcmp((const char *) root->name, "record"))
     {
-        r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb,
-                              &mergekey_norm);
+        const char *mergekey_norm =
+            get_mergekey(xdoc, root, cl, record_no, service, nmem,
+                         se->mergekey);
+        if (mergekey_norm)
+        {
+            struct record_metadata_attr *mk = (struct record_metadata_attr*)
+                nmem_malloc(nmem, sizeof(*mk));
+            mk->name = 0;
+            mk->value = nmem_strdup(nmem, mergekey_norm);
+            mk->next = 0;
+
+            if (!cached_copy)
+                insert_settings_values(sdb, xdoc, root, service);
+            r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb, mk);
+        }
     }
     else
     {
@@ -1760,7 +1835,6 @@ int ingest_record(struct client *cl, const char *rec,
                     (const char *) root->name);
         r = -1;
     }
-    xmlFreeDoc(xdoc);
     return r;
 }
 
@@ -1926,7 +2000,7 @@ static int ingest_to_cluster(struct client *cl,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
-                             const char *mergekey_norm)
+                             struct record_metadata_attr *merge_keys)
 {
     xmlNode *n;
     xmlChar *type = 0;
@@ -2020,18 +2094,22 @@ static int ingest_to_cluster(struct client *cl,
 
     if (check_limit_local(cl, record, record_no))
     {
-        session_log(se, YLOG_LOG, "Facet filtered out record no %d from %s",
-                    record_no, sdb->database->id);
         if (type)
             xmlFree(type);
         if (value)
             xmlFree(value);
         return -2;
     }
-    cluster = reclist_insert(se->reclist, service, record,
-                             mergekey_norm, &se->total_merged);
+    cluster = reclist_insert(se->reclist, se->relevance, service, record,
+                             merge_keys, &se->total_merged);
     if (!cluster)
+    {
+        if (type)
+            xmlFree(type);
+        if (value)
+            xmlFree(value);
         return 0; // complete match with existing record
+    }
 
     {
         const char *use_term_factor_str =
@@ -2051,9 +2129,6 @@ static int ingest_to_cluster(struct client *cl,
         session_log(se, YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
                     sdb->database->id, record_no);
 
-
-    relevance_newrec(se->relevance, cluster);
-
     // original metadata, to check if first existence of a field
     metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata);
     memcpy(metadata0, cluster->metadata,
@@ -2175,7 +2250,7 @@ static int ingest_to_cluster(struct client *cl,
                     {
                         const char *sort_str = 0;
                         int skip_article =
-                            ser_sk->type == Metadata_sortkey_skiparticle;
+                            ser_sk->type == Metadata_type_skiparticle;
 
                         if (!cluster->sortkeys[sk_field_id])
                             cluster->sortkeys[sk_field_id] =