Version 1.11.0
[pazpar2-moved-to-github.git] / src / session.c
index 9e238a6..e76da0a 100644 (file)
@@ -203,13 +203,66 @@ static void session_normalize_facet(struct session *s,
     run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf);
 }
 
-void add_facet(struct session *s, const char *type, const char *value, int count)
+struct facet_id {
+    char *client_id;
+    char *type;
+    char *id;
+    char *term;
+    struct facet_id *next;
+};
+
+static void session_add_id_facet(struct session *s, struct client *cl,
+                                 const char *type,
+                                 const char *id,
+                                 size_t id_len,
+                                 const char *term)
+{
+    struct facet_id *t = nmem_malloc(s->session_nmem, sizeof(*t));
+
+    t->client_id = nmem_strdup(s->session_nmem, client_get_id(cl));
+    t->type = nmem_strdup(s->session_nmem, type);
+    t->id = nmem_strdupn(s->session_nmem, id, id_len);
+    t->term = nmem_strdup(s->session_nmem, term);
+    t->next = s->facet_id_list;
+    s->facet_id_list = t;
+}
+
+
+const char *session_lookup_id_facet(struct session *s, struct client *cl,
+                                    const char *type,
+                                    const char *term)
+{
+    struct facet_id *t = s->facet_id_list;
+    for (; t; t = t->next)
+        if (!strcmp(client_get_id(cl), t->client_id) &&
+            !strcmp(t->type, type) && !strcmp(t->term, term))
+        {
+            return t->id;
+        }
+    return 0;
+}
+
+void add_facet(struct session *s, const char *type, const char *value, int count, struct client *cl)
 {
     WRBUF facet_wrbuf = wrbuf_alloc();
     WRBUF display_wrbuf = wrbuf_alloc();
+    const char *id = 0;
+    size_t id_len = 0;
 
-    session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf);
+    /* inspect pz:facetmap:split:name ?? */
+    if (!strncmp(type, "split:", 6))
+    {
+        const char *cp = strchr(value, ':');
+        if (cp)
+        {
+            id = value;
+            id_len = cp - value;
+            value = cp + 1;
+        }
+        type += 6;
+    }
 
+    session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf);
     if (wrbuf_len(facet_wrbuf))
     {
         struct named_termlist **tp = &s->termlists;
@@ -224,7 +277,10 @@ void add_facet(struct session *s, const char *type, const char *value, int count
             (*tp)->next = 0;
         }
         termlist_insert((*tp)->termlist, wrbuf_cstr(display_wrbuf),
-                        wrbuf_cstr(facet_wrbuf), count);
+                        wrbuf_cstr(facet_wrbuf), id, id_len, count);
+        if (id)
+            session_add_id_facet(s, cl, type, id, id_len,
+                                 wrbuf_cstr(display_wrbuf));
     }
     wrbuf_destroy(facet_wrbuf);
     wrbuf_destroy(display_wrbuf);
@@ -1027,6 +1083,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->clients_cached = 0;
     session->settings_modified = 0;
     session->session_nmem = nmem;
+    session->facet_id_list = 0;
     session->nmem = nmem_create();
     session->databases = 0;
     session->sorted_results = 0;
@@ -1049,8 +1106,6 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     return session;
 }
 
-const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf);
-
 static struct hitsbytarget *hitsbytarget_nb(struct session *se,
                                             int *count, NMEM nmem)
 {
@@ -1084,8 +1139,11 @@ static struct hitsbytarget *hitsbytarget_nb(struct session *se,
         session_settings_dump(se, client_get_database(cl), w);
         res[*count].settings_xml = nmem_strdup(nmem, wrbuf_cstr(w));
         wrbuf_rewind(w);
-        wrbuf_puts(w, "");
-        res[*count].suggestions_xml = nmem_strdup(nmem, client_get_suggestions_xml(cl, w));
+        res[*count].suggestions_xml =
+            nmem_strdup(nmem, client_get_suggestions_xml(cl, w));
+
+        res[*count].query_data =
+            client_get_query(cl, &res[*count].query_type, nmem);
         wrbuf_destroy(w);
         (*count)++;
     }
@@ -1215,7 +1273,6 @@ void perform_termlist(struct http_channel *c, struct session *se,
                         wrbuf_puts(c->wrbuf, "<name>");
                         wrbuf_xmlputs(c->wrbuf, p[i]->display_term);
                         wrbuf_puts(c->wrbuf, "</name>");
-
                         wrbuf_printf(c->wrbuf,
                                      "<frequency>%d</frequency>",
                                      p[i]->frequency);
@@ -1478,7 +1535,8 @@ void statistics(struct session *se, struct statistics *stat)
 }
 
 static struct record_metadata *record_metadata_init(
-    NMEM nmem, const char *value, enum conf_metadata_type type,
+    NMEM nmem, const char *value, const char *norm,
+    enum conf_metadata_type type,
     struct _xmlAttr *attr)
 {
     struct record_metadata *rec_md = record_metadata_create(nmem);
@@ -1508,11 +1566,20 @@ static struct record_metadata *record_metadata_init(
     {
     case Metadata_type_generic:
     case Metadata_type_skiparticle:
-        if (strstr(value, "://")) /* looks like a URL */
+        if (norm)
+        {
             rec_md->data.text.disp = nmem_strdup(nmem, value);
+            rec_md->data.text.norm = nmem_strdup(nmem, norm);
+        }
         else
-            rec_md->data.text.disp =
-                normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
+        {
+            if (strstr(value, "://")) /* looks like a URL */
+                rec_md->data.text.disp = nmem_strdup(nmem, value);
+            else
+                rec_md->data.text.disp =
+                    normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
+            rec_md->data.text.norm = rec_md->data.text.disp;
+        }
         rec_md->data.text.sort = 0;
         rec_md->data.text.snippet = 0;
         break;
@@ -1536,6 +1603,7 @@ static struct record_metadata *record_metadata_init(
         break;
     case Metadata_type_relevance:
     case Metadata_type_position:
+    case Metadata_type_retrieval:
         return 0;
     }
     return rec_md;
@@ -2068,6 +2136,20 @@ static int ingest_to_cluster(struct client *cl,
 
             if (!type)
                 continue;
+
+            md_field_id
+                = conf_service_metadata_field_id(service, (const char *) type);
+            if (md_field_id < 0)
+            {
+                if (se->number_of_warnings_unknown_metadata == 0)
+                {
+                    session_log(se, YLOG_WARN,
+                            "Ignoring unknown metadata element: %s", type);
+                }
+                se->number_of_warnings_unknown_metadata++;
+                continue;
+            }
+
             wrbuf_rewind(wrbuf_disp);
             value0 = xmlNodeListGetString(xdoc, n->children, 1);
             if (!value0 || !*value0)
@@ -2083,23 +2165,10 @@ static int ingest_to_cluster(struct client *cl,
             }
             if (value0)
                 xmlFree(value0);
-            md_field_id
-                = conf_service_metadata_field_id(service, (const char *) type);
-            if (md_field_id < 0)
-            {
-                if (se->number_of_warnings_unknown_metadata == 0)
-                {
-                    session_log(se, YLOG_WARN,
-                            "Ignoring unknown metadata element: %s", type);
-                }
-                se->number_of_warnings_unknown_metadata++;
-                continue;
-            }
-
             ser_md = &service->metadata[md_field_id];
 
             // non-merged metadata
-            rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+            rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), 0,
                                           ser_md->type, n->properties);
             if (!rec_md)
             {
@@ -2186,7 +2255,6 @@ static int ingest_to_cluster(struct client *cl,
             const char *type = 0;
             xmlChar *value0;
 
-            wrbuf_rewind(wrbuf_disp);
             type = yaz_xml_get_prop(n, "type");
             if (!type)
                 continue;
@@ -2204,6 +2272,9 @@ static int ingest_to_cluster(struct client *cl,
                 ser_sk = &service->sortkeys[sk_field_id];
             }
 
+            wrbuf_rewind(wrbuf_disp);
+            wrbuf_rewind(wrbuf_norm);
+
             value0 = xmlNodeListGetString(xdoc, n->children, 1);
             if (!value0 || !*value0)
             {
@@ -2211,16 +2282,29 @@ static int ingest_to_cluster(struct client *cl,
                     xmlFree(value0);
                 continue;
             }
-            wrbuf_puts(wrbuf_disp, (const char *) value0);
-            xmlFree(value0);
 
+            if (ser_md->icurule)
+            {
+                run_icu(se, ser_md->icurule, (const char *) value0,
+                        wrbuf_norm, wrbuf_disp);
+                yaz_log(YLOG_LOG, "run_icu input=%s norm=%s disp=%s",
+                        (const char *) value0,
+                        wrbuf_cstr(wrbuf_norm), wrbuf_cstr(wrbuf_disp));
+                rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+                                              wrbuf_cstr(wrbuf_norm),
+                                              ser_md->type, 0);
+            }
+            else
+            {
+                wrbuf_puts(wrbuf_disp, (const char *) value0);
+                rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+                                              0,
+                                              ser_md->type, 0);
+            }
 
-            // merged metadata
-            rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
-                                          ser_md->type, 0);
+            xmlFree(value0);
 
             // see if the field was not in cluster already (from beginning)
-
             if (!rec_md)
                 continue;
 
@@ -2262,8 +2346,8 @@ static int ingest_to_cluster(struct client *cl,
             {
                 while (*wheretoput)
                 {
-                    if (!strcmp((const char *) (*wheretoput)->data.text.disp,
-                                rec_md->data.text.disp))
+                    if (!strcmp((const char *) (*wheretoput)->data.text.norm,
+                                rec_md->data.text.norm))
                         break;
                     wheretoput = &(*wheretoput)->next;
                 }
@@ -2273,8 +2357,8 @@ static int ingest_to_cluster(struct client *cl,
             else if (ser_md->merge == Metadata_merge_longest)
             {
                 if (!*wheretoput
-                    || strlen(rec_md->data.text.disp)
-                    > strlen((*wheretoput)->data.text.disp))
+                    || strlen(rec_md->data.text.norm)
+                    > strlen((*wheretoput)->data.text.norm))
                 {
                     *wheretoput = rec_md;
                     if (ser_sk)
@@ -2354,15 +2438,15 @@ static int ingest_to_cluster(struct client *cl,
                     char year[64];
                     sprintf(year, "%d", rec_md->data.number.max);
 
-                    add_facet(se, (char *) type, year, term_factor);
+                    add_facet(se, (char *) type, year, term_factor, cl);
                     if (rec_md->data.number.max != rec_md->data.number.min)
                     {
                         sprintf(year, "%d", rec_md->data.number.min);
-                        add_facet(se, (char *) type, year, term_factor);
+                        add_facet(se, (char *) type, year, term_factor, cl);
                     }
                 }
                 else
-                    add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor);
+                    add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor, cl);
             }
         }
         else