Avoid removal of trailing chars if metadata looks like URL PAZ-915
[pazpar2-moved-to-github.git] / src / session.c
index 0aaec33..6d7ab65 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Pazpar2.
-   Copyright (C) 2006-2013 Index Data
+   Copyright (C) Index Data
 
 Pazpar2 is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -297,7 +297,8 @@ static void insert_settings_parameters(struct session_database *sdb,
 
 // Add static values from session database settings if applicable
 static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
-    struct conf_service *service)
+                                   xmlNode *root,
+                                   struct conf_service *service)
 {
     int i;
 
@@ -312,8 +313,7 @@ static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
             const char *val = session_setting_oneval(sdb, offset);
             if (val)
             {
-                xmlNode *r = xmlDocGetRootElement(doc);
-                xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata",
+                xmlNode *n = xmlNewTextChild(root, 0, (xmlChar *) "metadata",
                                              (xmlChar *) val);
                 xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name);
             }
@@ -338,17 +338,6 @@ static xmlDoc *normalize_record(struct session *se,
         {
             session_log(se, YLOG_WARN, "Normalize failed");
         }
-        else
-        {
-            insert_settings_values(sdb, rdoc, service);
-
-            if (global_parameters.dump_records)
-            {
-                session_log(se, YLOG_LOG, "Normalized record from %s",
-                            sdb->database->id);
-                log_xml_doc(rdoc);
-            }
-        }
     }
     return rdoc;
 }
@@ -748,6 +737,7 @@ enum pazpar2_error_code session_search(struct session *se,
                                        const char *filter,
                                        const char *limit,
                                        const char **addinfo,
+                                       const char **addinfo2,
                                        struct reclist_sortparms *sp,
                                        const char *mergekey,
                                        const char *rank)
@@ -758,6 +748,11 @@ enum pazpar2_error_code session_search(struct session *se,
     int no_failed_limit = 0;
     struct client_list *l, *l0;
 
+    session_alert_watch(se, SESSION_WATCH_SHOW);
+    session_alert_watch(se, SESSION_WATCH_BYTARGET);
+    session_alert_watch(se, SESSION_WATCH_TERMLIST);
+    session_alert_watch(se, SESSION_WATCH_SHOW_PREF);
+
     session_log(se, YLOG_DEBUG, "Search");
 
     *addinfo = 0;
@@ -813,7 +808,7 @@ enum pazpar2_error_code session_search(struct session *se,
         if (prepare_map(se, client_get_database(cl)) < 0)
             continue;
 
-        parse_ret = client_parse_query(cl, query, se->facet_limits);
+        parse_ret = client_parse_query(cl, query, se->facet_limits, addinfo2);
         if (parse_ret == -1)
             no_failed_query++;
         else if (parse_ret == -2)
@@ -845,7 +840,6 @@ enum pazpar2_error_code session_search(struct session *se,
         else
             return PAZPAR2_NO_TARGETS;
     }
-    session_log(se, YLOG_LOG, "session_start_search done");
     return PAZPAR2_NO_ERROR;
 }
 
@@ -946,7 +940,7 @@ void session_apply_setting(struct session *se, char *dbname, char *setting,
 void session_destroy(struct session *se)
 {
     struct session_database *sdb;
-    session_log(se, YLOG_DEBUG, "Destroying");
+    session_log(se, YLOG_LOG, "destroy");
     session_use(-1);
     session_remove_cached_clients(se);
 
@@ -1016,6 +1010,8 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->normalize_cache = normalize_cache_create();
     session->session_mutex = 0;
     pazpar2_mutex_create(&session->session_mutex, tmp_str);
+    session_log(session, YLOG_LOG, "create");
+
     session_use(1);
     return session;
 }
@@ -1240,6 +1236,7 @@ struct record_cluster *show_single_start(struct session *se, const char *id,
     session_enter(se, "show_single_start");
     *prev_r = 0;
     *next_r = 0;
+    reclist_limit(se->reclist, se, 1);
 
     reclist_enter(se->reclist);
     while ((r = reclist_read_record(se->reclist)))
@@ -1313,11 +1310,11 @@ struct record_cluster **show_range_start(struct session *se,
     *sumhits = 0;
     *approx_hits = 0;
     *total = 0;
-    reclist_limit(se->reclist, se);
+    reclist_limit(se->reclist, se, 0);
     if (se->relevance)
     {
         for (spp = sp; spp; spp = spp->next)
-            if (spp->type == Metadata_sortkey_relevance)
+            if (spp->type == Metadata_type_relevance)
             {
                 relevance_prepare_read(se->relevance, se->reclist);
                 break;
@@ -1458,17 +1455,20 @@ static struct record_metadata *record_metadata_init(
     }
     *attrp = 0;
 
-    if (type == Metadata_type_generic)
+    switch (type)
     {
-        char *p = nmem_strdup(nmem, value);
-
-        p = normalize7bit_generic(p, " ,/.:([");
-
-        rec_md->data.text.disp = p;
+    case Metadata_type_generic:
+    case Metadata_type_skiparticle:
+        if (strstr(value, "://")) /* looks like a URL */
+            rec_md->data.text.disp = nmem_strdup(nmem, value);
+        else
+            rec_md->data.text.disp =
+                normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
         rec_md->data.text.sort = 0;
         rec_md->data.text.snippet = 0;
-    }
-    else if (type == Metadata_type_year || type == Metadata_type_date)
+        break;
+    case Metadata_type_year:
+    case Metadata_type_date:
     {
         int first, last;
         int longdate = 0;
@@ -1481,8 +1481,14 @@ static struct record_metadata *record_metadata_init(
         rec_md->data.number.min = first;
         rec_md->data.number.max = last;
     }
-    else
+    break;
+    case Metadata_type_float:
+        rec_md->data.fnumber = atof(value);
+        break;
+    case Metadata_type_relevance:
+    case Metadata_type_position:
         return 0;
+    }
     return rec_md;
 }
 
@@ -1542,12 +1548,12 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
     return no_found;
 }
 
-static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
+static const char *get_mergekey(xmlDoc *doc, xmlNode *root, 
+                                struct client *cl, int record_no,
                                 struct conf_service *service, NMEM nmem,
                                 const char *session_mergekey)
 {
     char *mergekey_norm = 0;
-    xmlNode *root = xmlDocGetRootElement(doc);
     WRBUF norm_wr = wrbuf_alloc();
     xmlChar *mergekey;
 
@@ -1666,12 +1672,37 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb)
     return match;
 }
 
-
 static int ingest_to_cluster(struct client *cl,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
-                             const char *mergekey_norm);
+                             struct record_metadata_attr *mergekey);
+
+static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
+                             int record_no, NMEM nmem,
+                             struct session_database *sdb,
+                             struct record_metadata_attr *mergekeys)
+{
+    int ret = 0;
+    struct session *se = client_get_session(cl);
+    struct conf_service *service = se->service;
+
+    insert_settings_values(sdb, xdoc, root, service);
+
+    if (!check_record_filter(root, sdb))
+    {
+        session_log(se, YLOG_LOG,
+                    "Filtered out record no %d from %s",
+                    record_no, sdb->database->id);
+        return 0;
+    }
+    session_enter(se, "ingest_sub_record");
+    if (client_get_session(cl) == se && se->relevance)
+        ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
+    session_leave(se, "ingest_sub_record");
+
+    return ret;
+}
 
 /** \brief ingest XML record
     \param cl client holds the result set for record
@@ -1686,42 +1717,99 @@ int ingest_record(struct client *cl, const char *rec,
                   int record_no, NMEM nmem)
 {
     struct session *se = client_get_session(cl);
-    int ret = 0;
     struct session_database *sdb = client_get_database(cl);
     struct conf_service *service = se->service;
     xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem);
+    int r = 0;
     xmlNode *root;
-    const char *mergekey_norm;
 
     if (!xdoc)
         return -1;
 
+    if (global_parameters.dump_records)
+    {
+        session_log(se, YLOG_LOG, "Normalized record from %s",
+                    sdb->database->id);
+        log_xml_doc(xdoc);
+    }
+
     root = xmlDocGetRootElement(xdoc);
 
-    if (!check_record_filter(root, sdb))
+    if (!strcmp((const char *) root->name, "cluster"))
     {
-        session_log(se, YLOG_LOG, "Filtered out record no %d from %s", record_no, sdb->database->id);
-        xmlFreeDoc(xdoc);
-        return -2;
+        int no_merge_keys = 0;
+        int no_merge_dups = 0;
+        xmlNode *sroot;
+        struct record_metadata_attr *mk = 0;
+
+        for (sroot = root->children; sroot; sroot = sroot->next)
+            if (sroot->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) sroot->name, "record"))
+            {
+                struct record_metadata_attr **mkp;
+                const char *mergekey_norm =
+                    get_mergekey(xdoc, sroot, cl, record_no, service, nmem,
+                                 se->mergekey);
+                if (!mergekey_norm)
+                {
+                    r = -1;
+                    break;
+                }
+                for (mkp = &mk; *mkp; mkp = &(*mkp)->next)
+                    if (!strcmp((*mkp)->value, mergekey_norm))
+                        break;
+                if (!*mkp)
+                {
+                    *mkp = (struct record_metadata_attr*)
+                        nmem_malloc(nmem, sizeof(**mkp));
+                    (*mkp)->name = 0;
+                    (*mkp)->value = nmem_strdup(nmem, mergekey_norm);
+                    (*mkp)->next = 0;
+                    no_merge_keys++;
+                }
+                else
+                    no_merge_dups++;
+            }
+        if (no_merge_keys > 1 || no_merge_dups > 0)
+        {
+            yaz_log(YLOG_LOG, "Got %d mergekeys, %d dups for position %d",
+                    no_merge_keys, no_merge_dups, record_no);
+        }
+        for (sroot = root->children; !r && sroot; sroot = sroot->next)
+            if (sroot->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) sroot->name, "record"))
+            {
+                r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb,
+                                      mk);
+            }
     }
+    else if (!strcmp((const char *) root->name, "record"))
+    {
+        const char *mergekey_norm =
+            get_mergekey(xdoc, root, cl, record_no, service, nmem,
+                         se->mergekey);
+        if (mergekey_norm)
+        {
+            struct record_metadata_attr *mk = (struct record_metadata_attr*)
+                nmem_malloc(nmem, sizeof(*mk));
+            mk->name = 0;
+            mk->value = nmem_strdup(nmem, mergekey_norm);
+            mk->next = 0;
 
-    mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem,
-        se->mergekey);
-    if (!mergekey_norm)
+            r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb, mk);
+        }
+    }
+    else
     {
-        session_log(se, YLOG_WARN, "Got no mergekey");
-        xmlFreeDoc(xdoc);
-        return -1;
+        session_log(se, YLOG_WARN, "Bad pz root element: %s",
+                    (const char *) root->name);
+        r = -1;
     }
-    session_enter(se, "ingest_record");
-    if (client_get_session(cl) == se && se->relevance)
-        ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm);
-    session_leave(se, "ingest_record");
-
     xmlFreeDoc(xdoc);
-    return ret;
+    return r;
 }
 
+
 //    struct conf_metadata *ser_md = &service->metadata[md_field_id];
 //    struct record_metadata *rec_md = record->metadata[md_field_id];
 static int match_metadata_local(struct conf_service *service,
@@ -1883,7 +1971,7 @@ static int ingest_to_cluster(struct client *cl,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
-                             const char *mergekey_norm)
+                             struct record_metadata_attr *merge_keys)
 {
     xmlNode *n;
     xmlChar *type = 0;
@@ -1985,10 +2073,16 @@ static int ingest_to_cluster(struct client *cl,
             xmlFree(value);
         return -2;
     }
-    cluster = reclist_insert(se->reclist, service, record,
-                             mergekey_norm, &se->total_merged);
+    cluster = reclist_insert(se->reclist, se->relevance, service, record,
+                             merge_keys, &se->total_merged);
     if (!cluster)
+    {
+        if (type)
+            xmlFree(type);
+        if (value)
+            xmlFree(value);
         return 0; // complete match with existing record
+    }
 
     {
         const char *use_term_factor_str =
@@ -2008,9 +2102,6 @@ static int ingest_to_cluster(struct client *cl,
         session_log(se, YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
                     sdb->database->id, record_no);
 
-
-    relevance_newrec(se->relevance, cluster);
-
     // original metadata, to check if first existence of a field
     metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata);
     memcpy(metadata0, cluster->metadata,
@@ -2132,7 +2223,7 @@ static int ingest_to_cluster(struct client *cl,
                     {
                         const char *sort_str = 0;
                         int skip_article =
-                            ser_sk->type == Metadata_sortkey_skiparticle;
+                            ser_sk->type == Metadata_type_skiparticle;
 
                         if (!cluster->sortkeys[sk_field_id])
                             cluster->sortkeys[sk_field_id] =