Refactor PP2 charsets handling, use pazpar2_mutex.
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 15 Mar 2010 14:21:30 +0000 (15:21 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 15 Mar 2010 14:21:30 +0000 (15:21 +0100)
pp2_relvance_tokenize split into functions pp2_relvance_tokenize
and pp2_relevance_first. This allows ICU tokenize handlers to be
reused within a thread and makes ingest_to_cluster slightly faster.

14 files changed:
src/charsets.c
src/charsets.h
src/client.c
src/database.c
src/http.c
src/http_command.c
src/normalize_cache.c
src/pazpar2.c
src/pazpar2_config.c
src/reclists.c
src/relevance.c
src/relevance.h
src/session.c
src/session.h

index dfc1015..380b213 100644 (file)
@@ -38,12 +38,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #if YAZ_HAVE_ICU
 #include <yaz/icu.h>
-
-#if YAZ_VERSIONL >= 0x40002
-/* YAZ 4.0.2 or later as icu_iter */
-#define ICU_ITER 1
-#endif
-
 #endif
 
 /* charset handle */
@@ -72,9 +66,7 @@ struct pp2_relevance_token_s {
     pp2_charset_t pct;  /* our main charset handle (type+config) */
     WRBUF norm_str;     /* normized string we return (temporarily) */
     WRBUF sort_str;     /* sort string we return (temporarily) */
-#if ICU_ITER
     yaz_icu_iter_t iter;
-#endif
 };
 
 
@@ -151,14 +143,30 @@ void pp2_charset_destroy(pp2_charset_t pct)
     }
 }
 
-pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct,
-                                             const char *buf,
-                                             int skip_article)
+pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct)
 {
     pp2_relevance_token_t prt = xmalloc(sizeof(*prt));
 
     assert(pct);
 
+    prt->norm_str = wrbuf_alloc();
+    prt->sort_str = wrbuf_alloc();
+    prt->cp = 0;
+    prt->last_cp = 0;
+    prt->pct = pct;
+
+#if YAZ_HAVE_ICU
+    prt->iter = 0;
+    if (pct->icu_chn)
+        prt->iter = icu_iter_create(pct->icu_chn);
+#endif
+    return prt;
+}
+
+void pp2_relevance_first(pp2_relevance_token_t prt,
+                         const char *buf,
+                         int skip_article)
+{ 
     if (skip_article)
     {
         const char *p = buf;
@@ -176,39 +184,23 @@ pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct,
             buf = p;
     }
 
-    prt->norm_str = wrbuf_alloc();
-    prt->sort_str = wrbuf_alloc();
+    wrbuf_rewind(prt->norm_str);
+    wrbuf_rewind(prt->sort_str);
     prt->cp = buf;
     prt->last_cp = 0;
-    prt->pct = pct;
 
 #if YAZ_HAVE_ICU
-#if ICU_ITER
-    prt->iter = 0;
-#endif
-    if (pct->icu_chn)
+    if (prt->iter)
     {
-#if ICU_ITER
-        prt->iter = icu_iter_create(pct->icu_chn);
         icu_iter_first(prt->iter, buf);
-#else        
-        int ok = 0;
-        pct->icu_sts = U_ZERO_ERROR;
-
-        ok = icu_chain_assign_cstr(pct->icu_chn, buf, &pct->icu_sts);
-#endif
-        //printf("\nfield ok: %d '%s'\n", ok, buf);
-        prt->pct = pct;
     }
 #endif // YAZ_HAVE_ICU
-    return prt;
 }
 
-
 void pp2_relevance_token_destroy(pp2_relevance_token_t prt)
 {
     assert(prt);
-#if ICU_ITER
+#if YAZ_HAVE_ICU
     if (prt->iter)
         icu_iter_destroy(prt->iter);
 #endif
@@ -282,31 +274,16 @@ static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt)
 #if YAZ_HAVE_ICU
 static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt)
 {
-#if ICU_ITER
     if (icu_iter_next(prt->iter))
     {
         return icu_iter_get_norm(prt->iter);
     }
-#else
-    if (icu_chain_next_token(prt->pct->icu_chn, &prt->pct->icu_sts))
-    {
-        if (U_FAILURE(prt->pct->icu_sts))
-        {
-            return 0;
-        }
-        return icu_chain_token_norm(prt->pct->icu_chn);
-    }
-#endif
     return 0;
 }
 
 static const char *pp2_get_sort_icu(pp2_relevance_token_t prt)
 {
-#if ICU_ITER
     return icu_iter_get_sortkey(prt->iter);
-#else
-    return icu_chain_token_sortkey(prt->pct->icu_chn);
-#endif
 }
 
 #endif // YAZ_HAVE_ICU
index b09a78a..fbc6193 100644 (file)
@@ -37,9 +37,11 @@ pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn);
 void pp2_charset_destroy(pp2_charset_t pct);
 void pp2_charset_incref(pp2_charset_t pct);
 
-pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct,
-                                             const char *buf,
-                                             int skip_article);
+pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct);
+void pp2_relevance_first(pp2_relevance_token_t prt,
+                         const char *buf,
+                         int skip_article);
+
 void pp2_relevance_token_destroy(pp2_relevance_token_t prt);
 const char *pp2_relevance_token_next(pp2_relevance_token_t prt);
 const char *pp2_get_sort(pp2_relevance_token_t prt);
index 01267cd..89197f1 100644 (file)
@@ -58,6 +58,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/timing.h>
 #endif
 
+#include "ppmutex.h"
 #include "session.h"
 #include "parameters.h"
 #include "client.h"
@@ -595,8 +596,7 @@ struct client *client_create(void)
     r->resultset = 0;
     r->next = 0;
     r->mutex = 0;
-    yaz_mutex_create(&r->mutex);
-    yaz_mutex_set_name(r->mutex, "client");
+    pazpar2_mutex_create(&r->mutex, "client");
 
     r->ref_count = 1;
     
index 8ab626e..ab425c7 100644 (file)
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/log.h>
 #include <yaz/nmem.h>
 
+#include "ppmutex.h"
 #include "session.h"
 #include "host.h"
 #include "pazpar2_config.h"
@@ -109,8 +110,7 @@ static struct host *create_host(const char *hostport, iochan_man_t iochan_man)
         xfree(host);
         return 0;
     }
-    yaz_mutex_create(&host->mutex);
-    yaz_mutex_set_name(host->mutex, "host");
+    pazpar2_mutex_create(&host->mutex, "host");
 
     return host;
 }
@@ -411,8 +411,7 @@ database_hosts_t database_hosts_create(void)
     database_hosts_t p = xmalloc(sizeof(*p));
     p->hosts = 0;
     p->mutex = 0;
-    yaz_mutex_create(&p->mutex);
-    yaz_mutex_set_name(p->mutex, "database");
+    pazpar2_mutex_create(&p->mutex, "database");
     return p;
 }
 
index 59d0050..60d8e30 100644 (file)
@@ -63,6 +63,7 @@ typedef int socklen_t;
 #include <yaz/nmem.h>
 #include <yaz/mutex.h>
 
+#include "ppmutex.h"
 #include "session.h"
 #include "http.h"
 
@@ -1421,7 +1422,7 @@ void http_mutex_init(struct conf_server *server)
     assert(server);
 
     assert(server->http_server->mutex == 0);
-    yaz_mutex_create(&server->http_server->mutex);
+    pazpar2_mutex_create(&server->http_server->mutex, "http_server");
     server->http_server->http_sessions = http_sessions_create();
 }
 
index 3d4df77..6e80a52 100644 (file)
@@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/snprintf.h>
 #include <yaz/yaz-util.h>
 
+#include "ppmutex.h"
 #include "eventl.h"
 #include "parameters.h"
 #include "session.h"
@@ -67,7 +68,7 @@ http_sessions_t http_sessions_create(void)
     http_sessions_t hs = xmalloc(sizeof(*hs));
     hs->session_list = 0;
     hs->mutex = 0;
-    yaz_mutex_create(&hs->mutex);
+    pazpar2_mutex_create(&hs->mutex, "http_sessions");
     return hs;
 }
 
@@ -98,13 +99,16 @@ static void session_timeout(IOCHAN i, int event)
 }
 
 struct http_session *http_session_create(struct conf_service *service,
-                                         http_sessions_t http_sessions)
+                                         http_sessions_t http_sessions,
+                                         unsigned int sesid)
 {
     NMEM nmem = nmem_create();
     struct http_session *r = nmem_malloc(nmem, sizeof(*r));
+    char tmp_str[50];
 
-    r->psession = new_session(nmem, service);
-    r->session_id = 0;
+    sprintf(tmp_str, "session#%u", sesid);
+    r->psession = new_session(nmem, service, tmp_str);
+    r->session_id = sesid;
     r->timestamp = 0;
     r->nmem = nmem;
     r->destroy_counter = r->activity_counter = 0;
@@ -356,7 +360,8 @@ static void cmd_init(struct http_channel *c)
             return;
         }
     }
-    s = http_session_create(service, c->http_sessions);
+    sesid = make_sessionid();
+    s = http_session_create(service, c->http_sessions, sesid);
     
     yaz_log(YLOG_DEBUG, "HTTP Session init");
     if (!clear || *clear == '0')
@@ -364,13 +369,11 @@ static void cmd_init(struct http_channel *c)
     else
         yaz_log(YLOG_LOG, "No databases preloaded");
     
-    sesid = make_sessionid();
-    s->session_id = sesid;
     if (process_settings(s->psession, c->request, c->response) < 0)
         return;
     
     sprintf(buf, HTTP_COMMAND_RESPONSE_PREFIX 
-            "<init><status>OK</status><session>%u", sesid);
+            "<init><status>OK</status><session>%d", sesid);
     if (c->server->server_id)
     {
         strcat(buf, ".");
index a863fc3..ee10186 100644 (file)
@@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <config.h>
 #endif
 
+#include "ppmutex.h"
 #include "normalize_cache.h"
 
 #include "pazpar2_config.h"
@@ -50,8 +51,7 @@ normalize_cache_t normalize_cache_create(void)
     nc->nmem = nmem;
     nc->items = 0;
     nc->mutex = 0;
-    yaz_mutex_create(&nc->mutex);
-    yaz_mutex_set_name(nc->mutex, "normalize_cache");
+    pazpar2_mutex_create(&nc->mutex, "normalize_cache");
     return nc;
 }
 
index 66e8a17..4e4b1ef 100644 (file)
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include "parameters.h"
 #include "session.h"
+#include "ppmutex.h"
 #include <yaz/daemon.h>
 #include <yaz/log.h>
 #include <yaz/options.h>
@@ -142,7 +143,7 @@ static int sc_main(
         case 'V':
             show_version();
         case 'X':
-            global_parameters.debug_mode = 1;
+            global_parameters.debug_mode++;
             break;
         default:
             fprintf(stderr, "Usage: pazpar2\n"
@@ -170,6 +171,9 @@ static int sc_main(
         yaz_log(YLOG_FATAL, "Configuration must be given with option -f");
         return 1;
     }
+    if (global_parameters.debug_mode > 1)
+        pazpar2_mutex_enable_debug(1);
+    
     config = config_create(config_fname, global_parameters.dump_records);
     if (!config)
         return 1;
index 3f3e03b..094d7fe 100644 (file)
@@ -38,6 +38,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #if HAVE_UNISTD_H
 #include <unistd.h>
 #endif
+#include "ppmutex.h"
 #include "incref.h"
 #include "pazpar2_config.h"
 #include "settings.h"
@@ -697,7 +698,7 @@ struct conf_service *service_create(struct conf_server *server,
         inherit_server_settings(service);
         resolve_databases(service);
         assert(service->mutex == 0);
-        yaz_mutex_create(&service->mutex);
+        pazpar2_mutex_create(&service->mutex, "conf");
     }
     return service;
 }
@@ -1060,7 +1061,7 @@ void config_process_events(struct conf_config *conf)
         {
             resolve_databases(s);
             assert(s->mutex == 0);
-            yaz_mutex_create(&s->mutex);
+            pazpar2_mutex_create(&s->mutex, "service");
         }
         http_mutex_init(ser);
     }
index 1221b2b..7912ca2 100644 (file)
@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <yaz/yaz-util.h>
 
+#include "ppmutex.h"
 #include "session.h"
 #include "reclists.h"
 #include "jenkins_hash.h"
@@ -249,8 +250,7 @@ struct reclist *reclist_create(NMEM nmem)
 
     res->num_records = 0;
     res->mutex = 0;
-    yaz_mutex_create(&res->mutex);
-    yaz_mutex_set_name(res->mutex, "reclist");
+    pazpar2_mutex_create(&res->mutex, "reclist");
     return res;
 }
 
index c6b7829..35b7d83 100644 (file)
@@ -33,7 +33,7 @@ struct relevance
     int *doc_frequency_vec;
     int vec_len;
     struct word_entry *entries;
-    pp2_charset_t pct;
+    pp2_relevance_token_t prt;
     NMEM nmem;
 };
 
@@ -68,7 +68,8 @@ int word_entry_match(struct word_entry *entries, const char *norm_str)
     return 0;
 }
 
-static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem,
+static struct word_entry *build_word_entries(pp2_relevance_token_t prt,
+                                             NMEM nmem,
                                              const char **terms)
 {
     int termno = 1; /* >0 signals THERE is an entry */
@@ -77,14 +78,11 @@ static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem,
 
     for (; *p; p++)
     {
-        pp2_relevance_token_t prt = pp2_relevance_tokenize(pct, *p, 0);
         const char *norm_str;
 
+        pp2_relevance_first(prt, *p, 0);
         while ((norm_str = pp2_relevance_token_next(prt)))
             add_word_entry(nmem, &entries, norm_str, termno);
-
-        pp2_relevance_token_destroy(prt);
-
         termno++;
     }
     return entries;
@@ -93,15 +91,15 @@ static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem,
 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
                           const char *words, int multiplier, const char *name)
 {
-    pp2_relevance_token_t prt = pp2_relevance_tokenize(r->pct, words, 0);
     int *mult = cluster->term_frequency_vec_tmp;
     const char *norm_str;
     int i, length = 0;
 
+    pp2_relevance_first(r->prt, words, 0);
     for (i = 1; i < r->vec_len; i++)
         mult[i] = 0;
 
-    while ((norm_str = pp2_relevance_token_next(prt)))
+    while ((norm_str = pp2_relevance_token_next(r->prt)))
     {
         int res = word_entry_match(r->entries, norm_str);
         if (res)
@@ -120,7 +118,6 @@ void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
     }
 
     cluster->term_frequency_vec[0] += length;
-    pp2_relevance_token_destroy(prt);
 }
 
 struct relevance *relevance_create(pp2_charset_t pct,
@@ -136,11 +133,20 @@ struct relevance *relevance_create(pp2_charset_t pct,
     res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
     memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int));
     res->nmem = nmem;
-    res->entries = build_word_entries(pct, nmem, terms);
-    res->pct = pct;
+    res->prt = pp2_relevance_tokenize(pct);
+    res->entries = build_word_entries(res->prt, nmem, terms);
     return res;
 }
 
+void relevance_destroy(struct relevance **rp)
+{
+    if (*rp)
+    {
+        pp2_relevance_token_destroy((*rp)->prt);
+        *rp = 0;
+    }
+}
+
 void relevance_newrec(struct relevance *r, struct record_cluster *rec)
 {
     if (!rec->term_frequency_vec)
index 6bd2d42..1f30b95 100644 (file)
@@ -29,6 +29,7 @@ struct reclist;
 
 struct relevance *relevance_create(pp2_charset_t pct,
                                    NMEM nmem, const char **terms);
+void relevance_destroy(struct relevance **rp);
 void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
                           const char *words, int multiplier, const char *name);
index 5672536..c0f9c16 100644 (file)
@@ -57,6 +57,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/timing.h>
 #endif
 
+#include "ppmutex.h"
 #include "parameters.h"
 #include "session.h"
 #include "eventl.h"
@@ -502,7 +503,7 @@ enum pazpar2_error_code search(struct session *se,
     reclist_destroy(se->reclist);
     se->reclist = 0;
     nmem_reset(se->nmem);
-    se->relevance = 0;
+    relevance_destroy(&se->relevance);
     se->total_records = se->total_hits = se->total_merged = 0;
     se->num_termlists = 0;
     live_channels = select_targets(se, filter);
@@ -651,6 +652,7 @@ void destroy_session(struct session *s)
     for (sdb = s->databases; sdb; sdb = sdb->next)
         session_database_destroy(sdb);
     normalize_cache_destroy(s->normalize_cache);
+    relevance_destroy(&s->relevance);
     reclist_destroy(s->reclist);
     nmem_destroy(s->nmem);
     service_destroy(s->service);
@@ -658,7 +660,8 @@ void destroy_session(struct session *s)
     wrbuf_destroy(s->wrbuf);
 }
 
-struct session *new_session(NMEM nmem, struct conf_service *service) 
+struct session *new_session(NMEM nmem, struct conf_service *service,
+                            const char *name)
 {
     int i;
     struct session *session = nmem_malloc(nmem, sizeof(*session));
@@ -685,8 +688,8 @@ struct session *new_session(NMEM nmem, struct conf_service *service)
     }
     session->normalize_cache = normalize_cache_create();
     session->mutex = 0;
-    yaz_mutex_create(&session->mutex);
-    yaz_mutex_set_name(session->mutex, "session");
+
+    pazpar2_mutex_create(&session->mutex, name);
 
     return session;
 }
@@ -951,10 +954,9 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                 {
                     const char *norm_str;
                     pp2_relevance_token_t prt =
-                        pp2_relevance_tokenize(
-                            service->mergekey_pct,
-                            (const char *) value, 0);
+                        pp2_relevance_tokenize(service->mergekey_pct);
                     
+                    pp2_relevance_first(prt, (const char *) value, 0);
                     if (wrbuf_len(norm_wr) > 0)
                         wrbuf_puts(norm_wr, " ");
                     wrbuf_puts(norm_wr, name);
@@ -991,10 +993,9 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
     {
         const char *norm_str;
         pp2_relevance_token_t prt =
-            pp2_relevance_tokenize(
-                service->mergekey_pct,
-                (const char *) mergekey, 0);
-        
+            pp2_relevance_tokenize(service->mergekey_pct);
+
+        pp2_relevance_first(prt, (const char *) mergekey, 0);
         while ((norm_str = pp2_relevance_token_next(prt)))
         {
             if (*norm_str)
@@ -1274,9 +1275,10 @@ static int ingest_to_cluster(struct client *cl,
                                 nmem_malloc(se->nmem, 
                                             sizeof(union data_types));
                          
-                        prt = pp2_relevance_tokenize(
-                            service->sort_pct,
-                            rec_md->data.text.disp, skip_article);
+                        prt = pp2_relevance_tokenize(service->sort_pct);
+
+                        pp2_relevance_first(prt, rec_md->data.text.disp,
+                                            skip_article);
 
                         pp2_relevance_token_next(prt);
                          
index 4aba337..76a03a2 100644 (file)
@@ -143,7 +143,8 @@ struct hitsbytarget {
 };
 
 struct hitsbytarget *hitsbytarget(struct session *s, int *count, NMEM nmem);
-struct session *new_session(NMEM nmem, struct conf_service *service);
+struct session *new_session(NMEM nmem, struct conf_service *service,
+                            const char *name);
 void destroy_session(struct session *s);
 void session_init_databases(struct session *s);
 int load_targets(struct session *s, const char *fn);