From 2cfd7518dc740c913602ea8306ec8f27509b00d4 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 15 Mar 2010 15:21:30 +0100 Subject: [PATCH] Refactor PP2 charsets handling, use pazpar2_mutex. pp2_relvance_tokenize split into functions pp2_relvance_tokenize and pp2_relevance_first. This allows ICU tokenize handlers to be reused within a thread and makes ingest_to_cluster slightly faster. --- src/charsets.c | 69 +++++++++++++++++-------------------------------- src/charsets.h | 8 +++--- src/client.c | 4 +-- src/database.c | 7 +++-- src/http.c | 3 ++- src/http_command.c | 19 ++++++++------ src/normalize_cache.c | 4 +-- src/pazpar2.c | 6 ++++- src/pazpar2_config.c | 5 ++-- src/reclists.c | 4 +-- src/relevance.c | 28 ++++++++++++-------- src/relevance.h | 1 + src/session.c | 30 +++++++++++---------- src/session.h | 3 ++- 14 files changed, 94 insertions(+), 97 deletions(-) diff --git a/src/charsets.c b/src/charsets.c index dfc1015..380b213 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -38,12 +38,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #if YAZ_HAVE_ICU #include - -#if YAZ_VERSIONL >= 0x40002 -/* YAZ 4.0.2 or later as icu_iter */ -#define ICU_ITER 1 -#endif - #endif /* charset handle */ @@ -72,9 +66,7 @@ struct pp2_relevance_token_s { pp2_charset_t pct; /* our main charset handle (type+config) */ WRBUF norm_str; /* normized string we return (temporarily) */ WRBUF sort_str; /* sort string we return (temporarily) */ -#if ICU_ITER yaz_icu_iter_t iter; -#endif }; @@ -151,14 +143,30 @@ void pp2_charset_destroy(pp2_charset_t pct) } } -pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct, - const char *buf, - int skip_article) +pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct) { pp2_relevance_token_t prt = xmalloc(sizeof(*prt)); assert(pct); + prt->norm_str = wrbuf_alloc(); + prt->sort_str = wrbuf_alloc(); + prt->cp = 0; + prt->last_cp = 0; + prt->pct = pct; + +#if YAZ_HAVE_ICU + prt->iter = 0; + if (pct->icu_chn) + prt->iter = icu_iter_create(pct->icu_chn); +#endif + return prt; +} + +void pp2_relevance_first(pp2_relevance_token_t prt, + const char *buf, + int skip_article) +{ if (skip_article) { const char *p = buf; @@ -176,39 +184,23 @@ pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct, buf = p; } - prt->norm_str = wrbuf_alloc(); - prt->sort_str = wrbuf_alloc(); + wrbuf_rewind(prt->norm_str); + wrbuf_rewind(prt->sort_str); prt->cp = buf; prt->last_cp = 0; - prt->pct = pct; #if YAZ_HAVE_ICU -#if ICU_ITER - prt->iter = 0; -#endif - if (pct->icu_chn) + if (prt->iter) { -#if ICU_ITER - prt->iter = icu_iter_create(pct->icu_chn); icu_iter_first(prt->iter, buf); -#else - int ok = 0; - pct->icu_sts = U_ZERO_ERROR; - - ok = icu_chain_assign_cstr(pct->icu_chn, buf, &pct->icu_sts); -#endif - //printf("\nfield ok: %d '%s'\n", ok, buf); - prt->pct = pct; } #endif // YAZ_HAVE_ICU - return prt; } - void pp2_relevance_token_destroy(pp2_relevance_token_t prt) { assert(prt); -#if ICU_ITER +#if YAZ_HAVE_ICU if (prt->iter) icu_iter_destroy(prt->iter); #endif @@ -282,31 +274,16 @@ static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt) #if YAZ_HAVE_ICU static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt) { -#if ICU_ITER if (icu_iter_next(prt->iter)) { return icu_iter_get_norm(prt->iter); } -#else - if (icu_chain_next_token(prt->pct->icu_chn, &prt->pct->icu_sts)) - { - if (U_FAILURE(prt->pct->icu_sts)) - { - return 0; - } - return icu_chain_token_norm(prt->pct->icu_chn); - } -#endif return 0; } static const char *pp2_get_sort_icu(pp2_relevance_token_t prt) { -#if ICU_ITER return icu_iter_get_sortkey(prt->iter); -#else - return icu_chain_token_sortkey(prt->pct->icu_chn); -#endif } #endif // YAZ_HAVE_ICU diff --git a/src/charsets.h b/src/charsets.h index b09a78a..fbc6193 100644 --- a/src/charsets.h +++ b/src/charsets.h @@ -37,9 +37,11 @@ pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn); void pp2_charset_destroy(pp2_charset_t pct); void pp2_charset_incref(pp2_charset_t pct); -pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct, - const char *buf, - int skip_article); +pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct); +void pp2_relevance_first(pp2_relevance_token_t prt, + const char *buf, + int skip_article); + void pp2_relevance_token_destroy(pp2_relevance_token_t prt); const char *pp2_relevance_token_next(pp2_relevance_token_t prt); const char *pp2_get_sort(pp2_relevance_token_t prt); diff --git a/src/client.c b/src/client.c index 01267cd..89197f1 100644 --- a/src/client.c +++ b/src/client.c @@ -58,6 +58,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #endif +#include "ppmutex.h" #include "session.h" #include "parameters.h" #include "client.h" @@ -595,8 +596,7 @@ struct client *client_create(void) r->resultset = 0; r->next = 0; r->mutex = 0; - yaz_mutex_create(&r->mutex); - yaz_mutex_set_name(r->mutex, "client"); + pazpar2_mutex_create(&r->mutex, "client"); r->ref_count = 1; diff --git a/src/database.c b/src/database.c index 8ab626e..ab425c7 100644 --- a/src/database.c +++ b/src/database.c @@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include "ppmutex.h" #include "session.h" #include "host.h" #include "pazpar2_config.h" @@ -109,8 +110,7 @@ static struct host *create_host(const char *hostport, iochan_man_t iochan_man) xfree(host); return 0; } - yaz_mutex_create(&host->mutex); - yaz_mutex_set_name(host->mutex, "host"); + pazpar2_mutex_create(&host->mutex, "host"); return host; } @@ -411,8 +411,7 @@ database_hosts_t database_hosts_create(void) database_hosts_t p = xmalloc(sizeof(*p)); p->hosts = 0; p->mutex = 0; - yaz_mutex_create(&p->mutex); - yaz_mutex_set_name(p->mutex, "database"); + pazpar2_mutex_create(&p->mutex, "database"); return p; } diff --git a/src/http.c b/src/http.c index 59d0050..60d8e30 100644 --- a/src/http.c +++ b/src/http.c @@ -63,6 +63,7 @@ typedef int socklen_t; #include #include +#include "ppmutex.h" #include "session.h" #include "http.h" @@ -1421,7 +1422,7 @@ void http_mutex_init(struct conf_server *server) assert(server); assert(server->http_server->mutex == 0); - yaz_mutex_create(&server->http_server->mutex); + pazpar2_mutex_create(&server->http_server->mutex, "http_server"); server->http_server->http_sessions = http_sessions_create(); } diff --git a/src/http_command.c b/src/http_command.c index 3d4df77..6e80a52 100644 --- a/src/http_command.c +++ b/src/http_command.c @@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include "ppmutex.h" #include "eventl.h" #include "parameters.h" #include "session.h" @@ -67,7 +68,7 @@ http_sessions_t http_sessions_create(void) http_sessions_t hs = xmalloc(sizeof(*hs)); hs->session_list = 0; hs->mutex = 0; - yaz_mutex_create(&hs->mutex); + pazpar2_mutex_create(&hs->mutex, "http_sessions"); return hs; } @@ -98,13 +99,16 @@ static void session_timeout(IOCHAN i, int event) } struct http_session *http_session_create(struct conf_service *service, - http_sessions_t http_sessions) + http_sessions_t http_sessions, + unsigned int sesid) { NMEM nmem = nmem_create(); struct http_session *r = nmem_malloc(nmem, sizeof(*r)); + char tmp_str[50]; - r->psession = new_session(nmem, service); - r->session_id = 0; + sprintf(tmp_str, "session#%u", sesid); + r->psession = new_session(nmem, service, tmp_str); + r->session_id = sesid; r->timestamp = 0; r->nmem = nmem; r->destroy_counter = r->activity_counter = 0; @@ -356,7 +360,8 @@ static void cmd_init(struct http_channel *c) return; } } - s = http_session_create(service, c->http_sessions); + sesid = make_sessionid(); + s = http_session_create(service, c->http_sessions, sesid); yaz_log(YLOG_DEBUG, "HTTP Session init"); if (!clear || *clear == '0') @@ -364,13 +369,11 @@ static void cmd_init(struct http_channel *c) else yaz_log(YLOG_LOG, "No databases preloaded"); - sesid = make_sessionid(); - s->session_id = sesid; if (process_settings(s->psession, c->request, c->response) < 0) return; sprintf(buf, HTTP_COMMAND_RESPONSE_PREFIX - "OK%u", sesid); + "OK%d", sesid); if (c->server->server_id) { strcat(buf, "."); diff --git a/src/normalize_cache.c b/src/normalize_cache.c index a863fc3..ee10186 100644 --- a/src/normalize_cache.c +++ b/src/normalize_cache.c @@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #endif +#include "ppmutex.h" #include "normalize_cache.h" #include "pazpar2_config.h" @@ -50,8 +51,7 @@ normalize_cache_t normalize_cache_create(void) nc->nmem = nmem; nc->items = 0; nc->mutex = 0; - yaz_mutex_create(&nc->mutex); - yaz_mutex_set_name(nc->mutex, "normalize_cache"); + pazpar2_mutex_create(&nc->mutex, "normalize_cache"); return nc; } diff --git a/src/pazpar2.c b/src/pazpar2.c index 66e8a17..4e4b1ef 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "parameters.h" #include "session.h" +#include "ppmutex.h" #include #include #include @@ -142,7 +143,7 @@ static int sc_main( case 'V': show_version(); case 'X': - global_parameters.debug_mode = 1; + global_parameters.debug_mode++; break; default: fprintf(stderr, "Usage: pazpar2\n" @@ -170,6 +171,9 @@ static int sc_main( yaz_log(YLOG_FATAL, "Configuration must be given with option -f"); return 1; } + if (global_parameters.debug_mode > 1) + pazpar2_mutex_enable_debug(1); + config = config_create(config_fname, global_parameters.dump_records); if (!config) return 1; diff --git a/src/pazpar2_config.c b/src/pazpar2_config.c index 3f3e03b..094d7fe 100644 --- a/src/pazpar2_config.c +++ b/src/pazpar2_config.c @@ -38,6 +38,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #if HAVE_UNISTD_H #include #endif +#include "ppmutex.h" #include "incref.h" #include "pazpar2_config.h" #include "settings.h" @@ -697,7 +698,7 @@ struct conf_service *service_create(struct conf_server *server, inherit_server_settings(service); resolve_databases(service); assert(service->mutex == 0); - yaz_mutex_create(&service->mutex); + pazpar2_mutex_create(&service->mutex, "conf"); } return service; } @@ -1060,7 +1061,7 @@ void config_process_events(struct conf_config *conf) { resolve_databases(s); assert(s->mutex == 0); - yaz_mutex_create(&s->mutex); + pazpar2_mutex_create(&s->mutex, "service"); } http_mutex_init(ser); } diff --git a/src/reclists.c b/src/reclists.c index 1221b2b..7912ca2 100644 --- a/src/reclists.c +++ b/src/reclists.c @@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include +#include "ppmutex.h" #include "session.h" #include "reclists.h" #include "jenkins_hash.h" @@ -249,8 +250,7 @@ struct reclist *reclist_create(NMEM nmem) res->num_records = 0; res->mutex = 0; - yaz_mutex_create(&res->mutex); - yaz_mutex_set_name(res->mutex, "reclist"); + pazpar2_mutex_create(&res->mutex, "reclist"); return res; } diff --git a/src/relevance.c b/src/relevance.c index c6b7829..35b7d83 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -33,7 +33,7 @@ struct relevance int *doc_frequency_vec; int vec_len; struct word_entry *entries; - pp2_charset_t pct; + pp2_relevance_token_t prt; NMEM nmem; }; @@ -68,7 +68,8 @@ int word_entry_match(struct word_entry *entries, const char *norm_str) return 0; } -static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem, +static struct word_entry *build_word_entries(pp2_relevance_token_t prt, + NMEM nmem, const char **terms) { int termno = 1; /* >0 signals THERE is an entry */ @@ -77,14 +78,11 @@ static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem, for (; *p; p++) { - pp2_relevance_token_t prt = pp2_relevance_tokenize(pct, *p, 0); const char *norm_str; + pp2_relevance_first(prt, *p, 0); while ((norm_str = pp2_relevance_token_next(prt))) add_word_entry(nmem, &entries, norm_str, termno); - - pp2_relevance_token_destroy(prt); - termno++; } return entries; @@ -93,15 +91,15 @@ static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem, void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, int multiplier, const char *name) { - pp2_relevance_token_t prt = pp2_relevance_tokenize(r->pct, words, 0); int *mult = cluster->term_frequency_vec_tmp; const char *norm_str; int i, length = 0; + pp2_relevance_first(r->prt, words, 0); for (i = 1; i < r->vec_len; i++) mult[i] = 0; - while ((norm_str = pp2_relevance_token_next(prt))) + while ((norm_str = pp2_relevance_token_next(r->prt))) { int res = word_entry_match(r->entries, norm_str); if (res) @@ -120,7 +118,6 @@ void relevance_countwords(struct relevance *r, struct record_cluster *cluster, } cluster->term_frequency_vec[0] += length; - pp2_relevance_token_destroy(prt); } struct relevance *relevance_create(pp2_charset_t pct, @@ -136,11 +133,20 @@ struct relevance *relevance_create(pp2_charset_t pct, res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int)); res->nmem = nmem; - res->entries = build_word_entries(pct, nmem, terms); - res->pct = pct; + res->prt = pp2_relevance_tokenize(pct); + res->entries = build_word_entries(res->prt, nmem, terms); return res; } +void relevance_destroy(struct relevance **rp) +{ + if (*rp) + { + pp2_relevance_token_destroy((*rp)->prt); + *rp = 0; + } +} + void relevance_newrec(struct relevance *r, struct record_cluster *rec) { if (!rec->term_frequency_vec) diff --git a/src/relevance.h b/src/relevance.h index 6bd2d42..1f30b95 100644 --- a/src/relevance.h +++ b/src/relevance.h @@ -29,6 +29,7 @@ struct reclist; struct relevance *relevance_create(pp2_charset_t pct, NMEM nmem, const char **terms); +void relevance_destroy(struct relevance **rp); void relevance_newrec(struct relevance *r, struct record_cluster *cluster); void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, int multiplier, const char *name); diff --git a/src/session.c b/src/session.c index 5672536..c0f9c16 100644 --- a/src/session.c +++ b/src/session.c @@ -57,6 +57,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #endif +#include "ppmutex.h" #include "parameters.h" #include "session.h" #include "eventl.h" @@ -502,7 +503,7 @@ enum pazpar2_error_code search(struct session *se, reclist_destroy(se->reclist); se->reclist = 0; nmem_reset(se->nmem); - se->relevance = 0; + relevance_destroy(&se->relevance); se->total_records = se->total_hits = se->total_merged = 0; se->num_termlists = 0; live_channels = select_targets(se, filter); @@ -651,6 +652,7 @@ void destroy_session(struct session *s) for (sdb = s->databases; sdb; sdb = sdb->next) session_database_destroy(sdb); normalize_cache_destroy(s->normalize_cache); + relevance_destroy(&s->relevance); reclist_destroy(s->reclist); nmem_destroy(s->nmem); service_destroy(s->service); @@ -658,7 +660,8 @@ void destroy_session(struct session *s) wrbuf_destroy(s->wrbuf); } -struct session *new_session(NMEM nmem, struct conf_service *service) +struct session *new_session(NMEM nmem, struct conf_service *service, + const char *name) { int i; struct session *session = nmem_malloc(nmem, sizeof(*session)); @@ -685,8 +688,8 @@ struct session *new_session(NMEM nmem, struct conf_service *service) } session->normalize_cache = normalize_cache_create(); session->mutex = 0; - yaz_mutex_create(&session->mutex); - yaz_mutex_set_name(session->mutex, "session"); + + pazpar2_mutex_create(&session->mutex, name); return session; } @@ -951,10 +954,9 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, { const char *norm_str; pp2_relevance_token_t prt = - pp2_relevance_tokenize( - service->mergekey_pct, - (const char *) value, 0); + pp2_relevance_tokenize(service->mergekey_pct); + pp2_relevance_first(prt, (const char *) value, 0); if (wrbuf_len(norm_wr) > 0) wrbuf_puts(norm_wr, " "); wrbuf_puts(norm_wr, name); @@ -991,10 +993,9 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, { const char *norm_str; pp2_relevance_token_t prt = - pp2_relevance_tokenize( - service->mergekey_pct, - (const char *) mergekey, 0); - + pp2_relevance_tokenize(service->mergekey_pct); + + pp2_relevance_first(prt, (const char *) mergekey, 0); while ((norm_str = pp2_relevance_token_next(prt))) { if (*norm_str) @@ -1274,9 +1275,10 @@ static int ingest_to_cluster(struct client *cl, nmem_malloc(se->nmem, sizeof(union data_types)); - prt = pp2_relevance_tokenize( - service->sort_pct, - rec_md->data.text.disp, skip_article); + prt = pp2_relevance_tokenize(service->sort_pct); + + pp2_relevance_first(prt, rec_md->data.text.disp, + skip_article); pp2_relevance_token_next(prt); diff --git a/src/session.h b/src/session.h index 4aba337..76a03a2 100644 --- a/src/session.h +++ b/src/session.h @@ -143,7 +143,8 @@ struct hitsbytarget { }; struct hitsbytarget *hitsbytarget(struct session *s, int *count, NMEM nmem); -struct session *new_session(NMEM nmem, struct conf_service *service); +struct session *new_session(NMEM nmem, struct conf_service *service, + const char *name); void destroy_session(struct session *s); void session_init_databases(struct session *s); int load_targets(struct session *s, const char *fn); -- 1.7.10.4