facet ID term map PAZ-1008
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 2 Jul 2015 12:42:00 +0000 (14:42 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 6 Jul 2015 08:51:10 +0000 (10:51 +0200)
New setting pz:facetmap:split:name.

doc/pazpar2_conf.xml
perf/bash/pp2client.sh
src/client.c
src/session.c
src/session.h
src/termlists.c
src/termlists.h

index 86dbdc8..7994914 100644 (file)
        supports (native) facets. The value is the name of the
        field on the target.
       </para>
-      <note>
-       <para>
-       At this point only Solr targets have been tested with this
-       facility.
-       </para>
-      </note>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term>pz:facetmap:split:<replaceable>name</replaceable></term>
+     <listitem>
+      <para>
+       Like pz:facetmap, but makes Pazpar2 inspect the term value consisting
+       of two items separated by colon. First item is the raw ID to be
+       sent to database if limitmap on the field
+       <replaceable>name</replaceable> is used. The second item is
+       the display term.
+      </para>
+      <para>
+       This facility was added in Pazpar2 version 1.11.0.
+      </para>
      </listitem>
     </varlistentry>
 
index 03b0421..c39077d 100755 (executable)
@@ -93,6 +93,7 @@ if [ "$TIME" != "" ] ; then
 else
     wget -q -O ${TMP_DIR}$OF.show.xml "$H?command=show&session=$S&sort=relevance&start=0&num=100&block=1"
 fi
+wget -q -O ${TMP_DIR}$OF.termlist.xml "$H?command=termlist&session=$S"
 wget -q -O ${TMP_DIR}$OF.bytarget.xml "$H?command=bytarget&session=$S"
 wget -q -O ${TMP_DIR}$OF.stat.xml "$H?command=stat&session=$S"
 wget -q -O ${TMP_DIR}$OF.info.xml "$H?command=info"
index e25bd2d..644e42d 100644 (file)
@@ -521,7 +521,7 @@ static void client_report_facets(struct client *cl, ZOOM_resultset rs)
                                 ZOOM_facet_field_get_term(facets[facet_idx],
                                                           term_idx, &freq);
                             if (term)
-                                add_facet(se, p, term, freq);
+                                add_facet(se, p, term, freq, cl);
                         }
                         break;
                     }
@@ -1349,7 +1349,7 @@ static void ccl_quote_map_term(CCL_bibset ccl_map, WRBUF w,
     }
 }
 
-static int apply_limit(struct session_database *sdb,
+static int apply_limit(struct client *cl,
                        facet_limits_t facet_limits,
                        WRBUF w_pqf, CCL_bibset ccl_map,
                        struct conf_service *service)
@@ -1358,6 +1358,7 @@ static int apply_limit(struct session_database *sdb,
     int i = 0;
     const char *name;
     const char *value;
+    struct session_database *sdb = client_get_database(cl);
 
     NMEM nmem_tmp = nmem_create();
     for (i = 0; (name = facet_limits_get(facet_limits, i, &value)); i++)
@@ -1377,6 +1378,14 @@ static int apply_limit(struct session_database *sdb,
                 nmem_strsplit_escape2(nmem_tmp, "|", value, &values,
                                       &num, 1, '\\', 1);
 
+                for (i = 0; i < num; i++)
+                {
+                    const char *id = session_lookup_id_facet(cl->session,
+                                                             cl, name,
+                                                             values[i]);
+                    if (id)
+                        values[i] = nmem_strdup(nmem_tmp, id);
+                }
                 nmem_strsplit_escape2(nmem_tmp, ",", s->value, &cvalues,
                                       &cnum, 1, '\\', 1);
 
@@ -1503,7 +1512,7 @@ int client_parse_query(struct client *cl, const char *query,
         wrbuf_puts(w_pqf, " ");
     }
 
-    if (apply_limit(sdb, facet_limits, w_pqf, ccl_map, service))
+    if (apply_limit(cl, facet_limits, w_pqf, ccl_map, service))
     {
         ccl_qual_rm(&ccl_map);
         return -2;
index e67cd10..e76da0a 100644 (file)
@@ -203,13 +203,66 @@ static void session_normalize_facet(struct session *s,
     run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf);
 }
 
-void add_facet(struct session *s, const char *type, const char *value, int count)
+struct facet_id {
+    char *client_id;
+    char *type;
+    char *id;
+    char *term;
+    struct facet_id *next;
+};
+
+static void session_add_id_facet(struct session *s, struct client *cl,
+                                 const char *type,
+                                 const char *id,
+                                 size_t id_len,
+                                 const char *term)
+{
+    struct facet_id *t = nmem_malloc(s->session_nmem, sizeof(*t));
+
+    t->client_id = nmem_strdup(s->session_nmem, client_get_id(cl));
+    t->type = nmem_strdup(s->session_nmem, type);
+    t->id = nmem_strdupn(s->session_nmem, id, id_len);
+    t->term = nmem_strdup(s->session_nmem, term);
+    t->next = s->facet_id_list;
+    s->facet_id_list = t;
+}
+
+
+const char *session_lookup_id_facet(struct session *s, struct client *cl,
+                                    const char *type,
+                                    const char *term)
+{
+    struct facet_id *t = s->facet_id_list;
+    for (; t; t = t->next)
+        if (!strcmp(client_get_id(cl), t->client_id) &&
+            !strcmp(t->type, type) && !strcmp(t->term, term))
+        {
+            return t->id;
+        }
+    return 0;
+}
+
+void add_facet(struct session *s, const char *type, const char *value, int count, struct client *cl)
 {
     WRBUF facet_wrbuf = wrbuf_alloc();
     WRBUF display_wrbuf = wrbuf_alloc();
+    const char *id = 0;
+    size_t id_len = 0;
 
-    session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf);
+    /* inspect pz:facetmap:split:name ?? */
+    if (!strncmp(type, "split:", 6))
+    {
+        const char *cp = strchr(value, ':');
+        if (cp)
+        {
+            id = value;
+            id_len = cp - value;
+            value = cp + 1;
+        }
+        type += 6;
+    }
 
+    session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf);
     if (wrbuf_len(facet_wrbuf))
     {
         struct named_termlist **tp = &s->termlists;
@@ -224,7 +277,10 @@ void add_facet(struct session *s, const char *type, const char *value, int count
             (*tp)->next = 0;
         }
         termlist_insert((*tp)->termlist, wrbuf_cstr(display_wrbuf),
-                        wrbuf_cstr(facet_wrbuf), count);
+                        wrbuf_cstr(facet_wrbuf), id, id_len, count);
+        if (id)
+            session_add_id_facet(s, cl, type, id, id_len,
+                                 wrbuf_cstr(display_wrbuf));
     }
     wrbuf_destroy(facet_wrbuf);
     wrbuf_destroy(display_wrbuf);
@@ -1027,6 +1083,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->clients_cached = 0;
     session->settings_modified = 0;
     session->session_nmem = nmem;
+    session->facet_id_list = 0;
     session->nmem = nmem_create();
     session->databases = 0;
     session->sorted_results = 0;
@@ -1216,7 +1273,6 @@ void perform_termlist(struct http_channel *c, struct session *se,
                         wrbuf_puts(c->wrbuf, "<name>");
                         wrbuf_xmlputs(c->wrbuf, p[i]->display_term);
                         wrbuf_puts(c->wrbuf, "</name>");
-
                         wrbuf_printf(c->wrbuf,
                                      "<frequency>%d</frequency>",
                                      p[i]->frequency);
@@ -2382,15 +2438,15 @@ static int ingest_to_cluster(struct client *cl,
                     char year[64];
                     sprintf(year, "%d", rec_md->data.number.max);
 
-                    add_facet(se, (char *) type, year, term_factor);
+                    add_facet(se, (char *) type, year, term_factor, cl);
                     if (rec_md->data.number.max != rec_md->data.number.min)
                     {
                         sprintf(year, "%d", rec_md->data.number.min);
-                        add_facet(se, (char *) type, year, term_factor);
+                        add_facet(se, (char *) type, year, term_factor, cl);
                     }
                 }
                 else
-                    add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor);
+                    add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor, cl);
             }
         }
         else
index 153c6ad..49e5338 100644 (file)
@@ -113,6 +113,7 @@ struct session {
     facet_limits_t facet_limits;
     int clients_starting;
     struct reclist_sortparms *sorted_results;
+    struct facet_id *facet_id_list;
 };
 
 struct statistics {
@@ -192,11 +193,15 @@ int ingest_record(struct client *cl, const char *rec, int record_no, NMEM nmem);
 int ingest_xml_record(struct client *cl, xmlDoc *xdoc,
                       int record_no, NMEM nmem, int cached_copy);
 void session_alert_watch(struct session *s, int what);
-void add_facet(struct session *s, const char *type, const char *value, int count);
+void add_facet(struct session *s, const char *type, const char *value, int count, struct client *cl);
 
 int session_check_cluster_limit(struct session *se, struct record_cluster *rec);
 
 void perform_termlist(struct http_channel *c, struct session *se, const char *name, int num, int version);
+
+const char *session_lookup_id_facet(struct session *s, struct client *cl,
+                                    const char *type, const char *term);
+
 void session_log(struct session *s, int level, const char *fmt, ...)
 #ifdef __GNUC__
     __attribute__ ((format (printf, 3, 4)))
index 8f06a47..79e88ee 100644 (file)
@@ -62,7 +62,8 @@ struct termlist *termlist_create(NMEM nmem)
 }
 
 void termlist_insert(struct termlist *tl, const char *display_term,
-                     const char *norm_term, int freq)
+                     const char *norm_term, const char *id, size_t id_len,
+                     int freq)
 {
     unsigned int bucket;
     struct termlist_bucket **p;
@@ -87,6 +88,7 @@ void termlist_insert(struct termlist *tl, const char *display_term,
         new->term.norm_term = nmem_strdup(tl->nmem, buf);
         new->term.display_term = *display_term ?
             nmem_strdup(tl->nmem, display_term) : new->term.norm_term;
+        new->term.id = id ? nmem_strdupn(tl->nmem, id, id_len) : 0;
         new->term.frequency = freq;
         new->next = 0;
         *p = new;
index 8502e3e..0d5310a 100644 (file)
@@ -26,6 +26,7 @@ struct termlist_score
 {
     char *norm_term;
     char *display_term;
+    char *id;
     int frequency;
 };
 
@@ -33,7 +34,8 @@ struct termlist;
 
 struct termlist *termlist_create(NMEM nmem);
 void termlist_insert(struct termlist *tl, const char *display_term,
-                     const char *norm_term, int freq);
+                     const char *norm_term,
+                     const char *id, size_t id_len, int freq);
 struct termlist_score **termlist_highscore(struct termlist *tl, int *len,
                                            NMEM nmem);