delete-trailing-whitespace
[pazpar2-moved-to-github.git] / src / client.c
index a7649b4..4e36630 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Pazpar2.
-   Copyright (C) 2006-2013 Index Data
+   Copyright (C) Index Data
 
 Pazpar2 is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -71,6 +71,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "relevance.h"
 #include "incref.h"
 
+#define XDOC_CACHE_SIZE 100
+
 static YAZ_MUTEX g_mutex = 0;
 static int no_clients = 0;
 
@@ -90,14 +92,7 @@ static int client_use(int delta)
 
 int clients_count(void)
 {
-    int total = 0;
-    if (g_mutex)
-    {
-        yaz_mutex_enter(g_mutex);
-        total = no_clients;
-        yaz_mutex_leave(g_mutex);
-    }
-    return total;
+    return client_use(0);
 }
 
 /** \brief Represents client state for a connection to one search target */
@@ -111,7 +106,9 @@ struct client {
     Odr_int hits;
     int record_offset;
     int show_stat_no;
-    int filtered; // When using local:, this will count the number of filtered records.
+    int filtered; /* number of records ignored for local filtering */
+    int ingest_failures; /* number of records where XSLT/other failed */
+    int record_failures; /* number of records where ZOOM reported error */
     int maxrecs;
     int startrecs;
     int diagnostic;
@@ -128,6 +125,7 @@ struct client {
     int same_search;
     char *sort_strategy;
     char *sort_criteria;
+    xmlDoc **xdoc;
 };
 
 struct suggestions {
@@ -199,6 +197,50 @@ void client_set_state(struct client *cl, enum client_state st)
     }
 }
 
+static void client_init_xdoc(struct client *cl)
+{
+    int i;
+
+    cl->xdoc = xmalloc(sizeof(*cl->xdoc) * XDOC_CACHE_SIZE);
+    for (i = 0; i < XDOC_CACHE_SIZE; i++)
+        cl->xdoc[i] = 0;
+}
+
+static void client_destroy_xdoc(struct client *cl)
+{
+    int i;
+
+    assert(cl->xdoc);
+    for (i = 0; i < XDOC_CACHE_SIZE; i++)
+        if (cl->xdoc[i])
+            xmlFreeDoc(cl->xdoc[i]);
+    xfree(cl->xdoc);
+}
+
+xmlDoc *client_get_xdoc(struct client *cl, int record_no)
+{
+    assert(cl->xdoc);
+    if (record_no >= 0 && record_no < XDOC_CACHE_SIZE)
+        return cl->xdoc[record_no];
+    return 0;
+}
+
+void client_store_xdoc(struct client *cl, int record_no, xmlDoc *xdoc)
+{
+    assert(cl->xdoc);
+    if (record_no >= 0 && record_no < XDOC_CACHE_SIZE)
+    {
+        if (cl->xdoc[record_no])
+            xmlFreeDoc(cl->xdoc[record_no]);
+        cl->xdoc[record_no] = xdoc;
+    }
+    else
+    {
+        xmlFreeDoc(xdoc);
+    }
+}
+
+
 static void client_show_raw_error(struct client *cl, const char *addinfo);
 
 struct connection *client_get_connection(struct client *cl)
@@ -501,27 +543,6 @@ static void ingest_raw_record(struct client *cl, ZOOM_record rec)
     client_show_raw_dequeue(cl);
 }
 
-void client_check_preferred_watch(struct client *cl)
-{
-    struct session *se = cl->session;
-    yaz_log(YLOG_DEBUG, "client_check_preferred_watch: %s ", client_get_id(cl));
-    if (se)
-    {
-        client_unlock(cl);
-        /* TODO possible threading issue. Session can have been destroyed */
-        if (session_is_preferred_clients_ready(se)) {
-            session_alert_watch(se, SESSION_WATCH_SHOW_PREF);
-        }
-        else
-            yaz_log(YLOG_DEBUG, "client_check_preferred_watch: Still locked on preferred targets.");
-
-        client_lock(cl);
-    }
-    else
-        yaz_log(YLOG_WARN, "client_check_preferred_watch: %s. No session!", client_get_id(cl));
-
-}
-
 struct suggestions* client_suggestions_create(const char* suggestions_string);
 static void client_suggestions_destroy(struct client *cl);
 
@@ -530,23 +551,24 @@ void client_search_response(struct client *cl)
     struct connection *co = cl->connection;
     ZOOM_connection link = connection_get_link(co);
     ZOOM_resultset resultset = cl->resultset;
+    struct session *se = client_get_session(cl);
 
     const char *error, *addinfo = 0;
 
     if (ZOOM_connection_error(link, &error, &addinfo))
     {
         cl->hits = 0;
+        session_log(se, YLOG_WARN, "%s: Error %s (%s)",
+                    client_get_id(cl), error, addinfo);
         client_set_state(cl, Client_Error);
-        yaz_log(YLOG_WARN, "Search error %s (%s): %s",
-                error, addinfo, client_get_id(cl));
     }
     else
     {
         client_report_facets(cl, resultset);
         cl->record_offset = cl->startrecs;
         cl->hits = ZOOM_resultset_size(resultset);
-        yaz_log(YLOG_DEBUG, "client_search_response: hits " ODR_INT_PRINTF,
-                cl->hits);
+        session_log(se, YLOG_LOG, "%s: hits: " ODR_INT_PRINTF,
+                    client_get_id(cl), cl->hits);
         if (cl->suggestions)
             client_suggestions_destroy(cl);
         cl->suggestions =
@@ -560,6 +582,11 @@ void client_got_records(struct client *cl)
     struct session *se = cl->session;
     if (se)
     {
+        client_unlock(cl);
+        /* TODO possible threading issue. Session can have been destroyed */
+        if (session_is_preferred_clients_ready(se))
+            session_alert_watch(se, SESSION_WATCH_SHOW_PREF);
+        client_lock(cl);
         if (reclist_get_num_records(se->reclist) > 0)
         {
             client_unlock(cl);
@@ -578,16 +605,38 @@ static void client_record_ingest(struct client *cl)
     ZOOM_record rec = 0;
     ZOOM_resultset resultset = cl->resultset;
     struct session *se = client_get_session(cl);
+    xmlDoc *xdoc;
+    int offset = cl->record_offset + 1; /* 0 versus 1 numbered offsets */
 
-    if ((rec = ZOOM_resultset_record_immediate(resultset, cl->record_offset)))
+    xdoc = client_get_xdoc(cl, offset);
+    if (xdoc)
+    {
+        if (cl->session)
+        {
+            NMEM nmem = nmem_create();
+            int rc = ingest_xml_record(cl, xdoc, offset, nmem, 1);
+            if (rc == -1)
+            {
+                session_log(se, YLOG_WARN,
+                            "%s: #%d: failed to ingest xdoc",
+                            client_get_id(cl), offset);
+                cl->ingest_failures++;
+            }
+            else if (rc == -2)
+                cl->filtered++;
+            nmem_destroy(nmem);
+        }
+    }
+    else if ((rec = ZOOM_resultset_record_immediate(resultset,
+                                                    cl->record_offset)))
     {
-        int offset = ++cl->record_offset;
         if (cl->session == 0)
             ;  /* no operation */
         else if (ZOOM_record_error(rec, &msg, &addinfo, 0))
         {
             session_log(se, YLOG_WARN, "Record error %s (%s): %s #%d",
                         msg, addinfo, client_get_id(cl), offset);
+            cl->record_failures++;
         }
         else
         {
@@ -603,30 +652,32 @@ static void client_record_ingest(struct client *cl)
             if (!xmlrec)
             {
                 const char *rec_syn =  ZOOM_record_get(rec, "syntax", NULL);
-                session_log(se, YLOG_WARN, "ZOOM_record_get failed from %s #%d",
+                session_log(se, YLOG_WARN, "%s: #%d: ZOOM_record_get failed",
                             client_get_id(cl), offset);
                 session_log(se, YLOG_LOG, "pz:nativesyntax=%s . "
                             "ZOOM record type=%s . Actual record syntax=%s",
                             s ? s : "null", type,
                             rec_syn ? rec_syn : "null");
+                cl->ingest_failures++;
             }
             else
             {
                 /* OK = 0, -1 = failure, -2 = Filtered */
-                int rc = ingest_record(cl, xmlrec, cl->record_offset, nmem);
+                int rc = ingest_record(cl, xmlrec, offset, nmem);
                 if (rc == -1)
                 {
                     const char *rec_syn =  ZOOM_record_get(rec, "syntax", NULL);
                     session_log(se, YLOG_WARN,
-                                "Failed to ingest record from %s #%d",
+                                "%s: #%d: failed to ingest record",
                                 client_get_id(cl), offset);
                     session_log(se, YLOG_LOG, "pz:nativesyntax=%s . "
                                 "ZOOM record type=%s . Actual record syntax=%s",
                                 s ? s : "null", type,
                                 rec_syn ? rec_syn : "null");
+                    cl->ingest_failures++;
                 }
-                if (rc == -2)
-                    cl->filtered += 1;
+                else if (rc == -2)
+                    cl->filtered++;
             }
             nmem_destroy(nmem);
         }
@@ -634,8 +685,9 @@ static void client_record_ingest(struct client *cl)
     else
     {
         session_log(se, YLOG_WARN, "Got NULL record from %s #%d",
-                    client_get_id(cl), cl->record_offset);
+                    client_get_id(cl), offset);
     }
+    cl->record_offset++;
 }
 
 void client_record_response(struct client *cl, int *got_records)
@@ -647,9 +699,10 @@ void client_record_response(struct client *cl, int *got_records)
 
     if (ZOOM_connection_error(link, &error, &addinfo))
     {
+        struct session *se = client_get_session(cl);
+        session_log(se, YLOG_WARN, "%s: Error %s (%s)",
+                    client_get_id(cl), error, addinfo);
         client_set_state(cl, Client_Error);
-        yaz_log(YLOG_WARN, "Search error %s (%s): %s",
-            error, addinfo, client_get_id(cl));
     }
     else
     {
@@ -680,7 +733,7 @@ int client_reingest(struct client *cl)
 {
     int i = cl->startrecs;
     int to = cl->record_offset;
-    cl->filtered = 0;
+    cl->record_failures = cl->ingest_failures = cl->filtered = 0;
 
     cl->record_offset = i;
     for (; i < to; i++)
@@ -852,34 +905,44 @@ int client_start_search(struct client *cl)
     const char *opt_preferred   = session_setting_oneval(sdb, PZ_PREFERRED);
     const char *extra_args      = session_setting_oneval(sdb, PZ_EXTRA_ARGS);
     const char *opt_present_chunk = session_setting_oneval(sdb, PZ_PRESENT_CHUNK);
+    const char *opt_timeout     = session_setting_oneval(sdb, PZ_TIMEOUT);
     ZOOM_query query;
     char maxrecs_str[24], startrecs_str[24], present_chunk_str[24];
     struct timeval tval;
     int present_chunk = 20; // Default chunk size
     int rc_prep_connection;
+    int operation_timeout = se->service->z3950_operation_timeout;
 
+    cl->diagnostic = 0;
+    cl->record_failures = cl->ingest_failures = cl->filtered = 0;
 
     yaz_gettimeofday(&tval);
     tval.tv_sec += 5;
 
+    if (opt_timeout && *opt_timeout)
+        operation_timeout = atoi(opt_timeout);
+
     if (opt_present_chunk && strcmp(opt_present_chunk,"")) {
         present_chunk = atoi(opt_present_chunk);
         yaz_log(YLOG_DEBUG, "Present chunk set to %d", present_chunk);
     }
     rc_prep_connection =
-        client_prep_connection(cl, se->service->z3950_operation_timeout,
+        client_prep_connection(cl, operation_timeout,
                                se->service->z3950_session_timeout,
                                se->service->server->iochan_man,
                                &tval);
     /* Nothing has changed and we already have a result */
     if (cl->same_search == 1 && rc_prep_connection == 2)
     {
-        session_log(se, YLOG_LOG, "client %s REUSE result", client_get_id(cl));
+        session_log(se, YLOG_LOG, "%s: reuse result", client_get_id(cl));
+        client_report_facets(cl, cl->resultset);
         return client_reingest(cl);
     }
     else if (!rc_prep_connection)
     {
-        session_log(se, YLOG_LOG, "client %s FAILED to search: No connection.", client_get_id(cl));
+        session_log(se, YLOG_LOG, "%s: postponing search: No connection",
+                    client_get_id(cl));
+        client_set_state_nb(cl, Client_Working);
         return -1;
     }
     co = client_get_connection(cl);
@@ -887,10 +950,10 @@ int client_start_search(struct client *cl)
     link = connection_get_link(co);
     assert(link);
 
-    session_log(se, YLOG_LOG, "client %s NEW search", client_get_id(cl));
+    session_log(se, YLOG_LOG, "%s: new search", client_get_id(cl));
 
-    cl->diagnostic = 0;
-    cl->filtered = 0;
+    client_destroy_xdoc(cl);
+    client_init_xdoc(cl);
 
     if (extra_args && *extra_args)
         ZOOM_connection_option_set(link, "extraArgs", extra_args);
@@ -944,17 +1007,16 @@ int client_start_search(struct client *cl)
     query = ZOOM_query_create();
     if (cl->cqlquery)
     {
-        yaz_log(YLOG_LOG, "Client %s: Search CQL: %s", client_get_id(cl),
-                cl->cqlquery);
+        session_log(se, YLOG_LOG, "%s: Search CQL: %s", client_get_id(cl),
+                    cl->cqlquery);
         ZOOM_query_cql(query, cl->cqlquery);
         if (*opt_sort)
             ZOOM_query_sortby(query, opt_sort);
     }
     else
     {
-        yaz_log(YLOG_LOG, "Client %s: Search PQF: %s", client_get_id(cl),
-                cl->pquery);
-
+        session_log(se, YLOG_LOG, "%s: Search PQF: %s", client_get_id(cl),
+                    cl->pquery);
         ZOOM_query_prefix(query, cl->pquery);
     }
     if (cl->sort_strategy && cl->sort_criteria) {
@@ -1005,6 +1067,7 @@ struct client *client_create(const char *id)
     cl->sort_criteria = 0;
     assert(id);
     cl->id = xstrdup(id);
+    client_init_xdoc(cl);
     client_use(1);
 
     yaz_log(YLOG_DEBUG, "client_create c=%p %s", cl, id);
@@ -1050,6 +1113,7 @@ int client_destroy(struct client *c)
             assert(!c->connection);
             facet_limits_destroy(c->facet_limits);
 
+            client_destroy_xdoc(c);
             if (c->resultset)
             {
                 ZOOM_resultset_destroy(c->resultset);
@@ -1075,7 +1139,9 @@ void client_set_connection(struct client *cl, struct connection *con)
     }
     else
     {
+        client_lock(cl);
         cl->connection = con;
+        client_unlock(cl);
         client_destroy(cl);
     }
 }
@@ -1087,6 +1153,26 @@ void client_disconnect(struct client *cl)
     client_set_connection(cl, 0);
 }
 
+void client_stop(struct client *cl)
+{
+    client_lock(cl);
+    if (cl->state == Client_Working || cl->state == Client_Connecting)
+    {
+        yaz_log(YLOG_LOG, "client_stop: %s release", client_get_id(cl));
+        if (cl->connection)
+        {
+            connection_release2(cl->connection);
+            assert(cl->ref_count > 1);
+            cl->ref_count--;
+            cl->connection = 0;
+        }
+        cl->state = Client_Disconnected;
+    }
+    else
+        yaz_log(YLOG_LOG, "client_stop: %s ignore", client_get_id(cl));
+    client_unlock(cl);
+}
+
 // Initialize CCL map for a target
 static CCL_bibset prepare_cclmap(struct client *cl, CCL_bibset base_bibset)
 {
@@ -1222,6 +1308,31 @@ const char *client_get_facet_limit_local(struct client *cl,
     return 0;
 }
 
+static void ccl_quote_map_term(CCL_bibset ccl_map, WRBUF w,
+                               const char *term)
+{
+    int quote_it = 0;
+    const char *cp;
+    for (cp = term; *cp; cp++)
+        if ((*cp >= '0' && *cp <= '9') || strchr(" +-", *cp))
+            ;
+        else
+            quote_it = 1;
+    if (!quote_it)
+        wrbuf_puts(w, term);
+    else
+    {
+        wrbuf_putc(w, '\"');
+        for (cp = term; *cp; cp++)
+        {
+            if (strchr( "\\\"", *cp))
+                wrbuf_putc(w, '\\');
+            wrbuf_putc(w, *cp);
+        }
+        wrbuf_putc(w, '\"');
+    }
+}
+
 static int apply_limit(struct session_database *sdb,
                        facet_limits_t facet_limits,
                        WRBUF w_pqf, CCL_bibset ccl_map,
@@ -1282,10 +1393,8 @@ static int apply_limit(struct session_database *sdb,
                             struct ccl_rpn_node *cn;
                             wrbuf_rewind(ccl_w);
                             wrbuf_puts(ccl_w, ccl);
-                            wrbuf_puts(ccl_w, "=\"");
-                            wrbuf_puts(ccl_w, values[i]);
-                            wrbuf_puts(ccl_w, "\"");
-
+                            wrbuf_putc(ccl_w, '=');
+                            ccl_quote_map_term(ccl_map, ccl_w, values[i]);
                             cn = ccl_find_str(ccl_map, wrbuf_cstr(ccl_w),
                                               &cerror, &cpos);
                             if (cn)
@@ -1348,7 +1457,7 @@ static int apply_limit(struct session_database *sdb,
 // return -1 on query error
 // return -2 on limit error
 int client_parse_query(struct client *cl, const char *query,
-                       facet_limits_t facet_limits)
+                       facet_limits_t facet_limits, const char **error_msg)
 {
     struct session *se = client_get_session(cl);
     struct conf_service *service = se->service;
@@ -1393,6 +1502,8 @@ int client_parse_query(struct client *cl, const char *query,
     ccl_qual_rm(&ccl_map);
     if (!cn)
     {
+        if (error_msg)
+            *error_msg = ccl_err_msg(cerror);
         client_set_state(cl, Client_Error);
         session_log(se, YLOG_WARN, "Client %s: Failed to parse CCL query '%s'",
                     client_get_id(cl),
@@ -1429,7 +1540,7 @@ int client_parse_query(struct client *cl, const char *query,
     {
         if (cl->pquery)
             session_log(se, YLOG_LOG, "Client %s: "
-                        "Re-search due query/limit change: %s to %s", 
+                        "Re-search due query/limit change: %s to %s",
                         client_get_id(cl), cl->pquery, wrbuf_cstr(w_pqf));
         xfree(cl->pquery);
         cl->pquery = xstrdup(wrbuf_cstr(w_pqf));
@@ -1451,6 +1562,7 @@ int client_parse_query(struct client *cl, const char *query,
         session_log(se, YLOG_WARN, "Invalid PQF query for Client %s: %s",
                     client_get_id(cl), cl->pquery);
         ret_value = -1;
+        *error_msg = "Invalid PQF after CCL to PQF conversion";
     }
     else
     {
@@ -1465,7 +1577,10 @@ int client_parse_query(struct client *cl, const char *query,
             else
                 cl->cqlquery = make_cqlquery(cl, zquery);
             if (!cl->cqlquery)
+            {
+                *error_msg = "Cannot convert PQF to Solr/CQL";
                 ret_value = -1;
+            }
             else
                 session_log(se, YLOG_LOG, "Client %s native query: %s (%s)",
                             client_get_id(cl), cl->cqlquery, sru);
@@ -1494,6 +1609,8 @@ int client_parse_sort(struct client *cl, struct reclist_sortparms *sp)
         const char *sort_strategy_and_spec =
             get_strategy_plus_sort(cl, sp->name);
         int increasing = sp->increasing;
+        if (!strcmp(sp->name, "relevance"))
+            increasing = 1;
         if (sort_strategy_and_spec && strlen(sort_strategy_and_spec) < 40)
         {
             char strategy[50], *p;
@@ -1591,16 +1708,18 @@ Odr_int client_get_approximation(struct client *cl)
     return cl->hits;
 }
 
-int client_get_num_records(struct client *cl)
+int client_get_num_records(struct client *cl, int *filtered, int *ingest,
+                           int *failed)
 {
+    if (filtered)
+        *filtered = cl->filtered;
+    if (ingest)
+        *ingest = cl->ingest_failures;
+    if (failed)
+        *failed = cl->record_failures;
     return cl->record_offset;
 }
 
-int client_get_num_records_filtered(struct client *cl)
-{
-    return cl->filtered;
-}
-
 void client_set_diagnostic(struct client *cl, int diagnostic,
                            const char *message, const char *addinfo)
 {
@@ -1623,12 +1742,12 @@ int client_get_diagnostic(struct client *cl, const char **message,
     return cl->diagnostic;
 }
 
-const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf)
+const char *client_get_suggestions_xml(struct client *cl, WRBUF wrbuf)
 {
     /* int idx; */
     struct suggestions *suggestions = cl->suggestions;
 
-    if (!suggestions) 
+    if (!suggestions)
         return "";
     if (suggestions->passthrough)
     {
@@ -1652,7 +1771,6 @@ const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf)
     return wrbuf_cstr(wrbuf);
 }
 
-
 void client_set_database(struct client *cl, struct session_database *db)
 {
     cl->database = db;