From ca9817524ebe069581ad72f6bca0e9775d61e30c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 15 Feb 2010 13:07:26 +0100 Subject: [PATCH] session.[ch] replaces logic.c, pazpar2.h Because that's what this is about : sessions. --- src/Makefile.am | 2 +- src/client.c | 2 +- src/connection.c | 2 +- src/database.c | 4 +- src/getaddrinfo.c | 2 +- src/host.h | 2 + src/http.c | 2 +- src/http.h | 2 + src/http_command.c | 2 +- src/logic.c | 1363 ---------------------------------------------------- src/pazpar2.c | 2 +- src/pazpar2.h | 186 ------- src/reclists.c | 2 +- src/relevance.c | 2 +- src/session.c | 1363 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/session.h | 186 +++++++ src/settings.c | 2 +- win/makefile | 2 +- 18 files changed, 1567 insertions(+), 1561 deletions(-) delete mode 100644 src/logic.c delete mode 100644 src/pazpar2.h create mode 100644 src/session.c create mode 100644 src/session.h diff --git a/src/Makefile.am b/src/Makefile.am index 602d245..f098000 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -18,7 +18,7 @@ AM_CFLAGS = $(YAZINC) libpazpar2_a_SOURCES = pazpar2_config.c pazpar2_config.h eventl.c eventl.h \ http.c http_command.c http.h \ - logic.c pazpar2.h \ + session.c session.h \ record.h record.c reclists.c reclists.h \ relevance.c relevance.h termlists.c termlists.h \ normalize7bit.h normalize7bit.c \ diff --git a/src/client.c b/src/client.c index 65a308d..4c6c780 100644 --- a/src/client.c +++ b/src/client.c @@ -58,7 +58,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #endif -#include "pazpar2.h" +#include "session.h" #include "parameters.h" #include "client.h" #include "connection.h" diff --git a/src/connection.c b/src/connection.c index c1f3ff3..b8a7d01 100644 --- a/src/connection.c +++ b/src/connection.c @@ -42,7 +42,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include "connection.h" -#include "pazpar2.h" +#include "session.h" #include "host.h" #include "client.h" #include "settings.h" diff --git a/src/database.c b/src/database.c index 6f1599a..8ffbee2 100644 --- a/src/database.c +++ b/src/database.c @@ -27,9 +27,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include -#include "pazpar2.h" +#include "session.h" #include "host.h" +#include "pazpar2_config.h" #include "settings.h" #include "http.h" #include "zeerex.h" diff --git a/src/getaddrinfo.c b/src/getaddrinfo.c index 865d20a..12807b3 100644 --- a/src/getaddrinfo.c +++ b/src/getaddrinfo.c @@ -47,7 +47,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -#include "pazpar2.h" +#include "session.h" #include "connection.h" #include "host.h" diff --git a/src/host.h b/src/host.h index 5964af5..49f0107 100644 --- a/src/host.h +++ b/src/host.h @@ -20,6 +20,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #ifndef HOST_H #define HOST_H +#include + /** \brief Represents a host (irrespective of databases) */ struct host { char *hostport; diff --git a/src/http.c b/src/http.c index 5158ddb..c383cba 100644 --- a/src/http.c +++ b/src/http.c @@ -63,7 +63,7 @@ typedef int socklen_t; #include #include -#include "pazpar2.h" +#include "session.h" #include "http.h" #define MAX_HTTP_HEADER 4096 diff --git a/src/http.h b/src/http.h index f9af7b6..01c0e20 100644 --- a/src/http.h +++ b/src/http.h @@ -20,6 +20,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #ifndef HTTP_H #define HTTP_H +#include + #include "eventl.h" // Generic I/O buffer struct http_buf; diff --git a/src/http_command.c b/src/http_command.c index 6fe87b5..422fef1 100644 --- a/src/http_command.c +++ b/src/http_command.c @@ -35,7 +35,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "eventl.h" #include "parameters.h" -#include "pazpar2.h" +#include "session.h" #include "http.h" #include "settings.h" #include "client.h" diff --git a/src/logic.c b/src/logic.c deleted file mode 100644 index a19be1f..0000000 --- a/src/logic.c +++ /dev/null @@ -1,1363 +0,0 @@ -/* This file is part of Pazpar2. - Copyright (C) 2006-2010 Index Data - -Pazpar2 is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*/ - -/** \file logic.c - \brief high-level logic; mostly user sessions and settings -*/ - -#if HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#if HAVE_SYS_TIME_H -#include -#endif -#if HAVE_UNISTD_H -#include -#endif -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define USE_TIMING 0 -#if USE_TIMING -#include -#endif - -#include "parameters.h" -#include "pazpar2.h" -#include "eventl.h" -#include "http.h" -#include "termlists.h" -#include "reclists.h" -#include "relevance.h" -#include "database.h" -#include "client.h" -#include "settings.h" -#include "normalize7bit.h" - -#define TERMLIST_HIGH_SCORE 25 - -#define MAX_CHUNK 15 - -// Note: Some things in this structure will eventually move to configuration -struct parameters global_parameters = -{ - 0, // dump_records - 0 // debug_mode -}; - -static void log_xml_doc(xmlDoc *doc) -{ - FILE *lf = yaz_log_file(); - xmlChar *result = 0; - int len = 0; -#if LIBXML_VERSION >= 20600 - xmlDocDumpFormatMemory(doc, &result, &len, 1); -#else - xmlDocDumpMemory(doc, &result, &len); -#endif - if (lf && len) - { - (void) fwrite(result, 1, len, lf); - fprintf(lf, "\n"); - } - xmlFree(result); -} - -static void session_enter(struct session *s) -{ - yaz_mutex_enter(s->mutex); -} - -static void session_leave(struct session *s) -{ - yaz_mutex_leave(s->mutex); -} - -// Recursively traverse query structure to extract terms. -void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) -{ - char **words; - int numwords; - int i; - - switch (n->kind) - { - case CCL_RPN_AND: - case CCL_RPN_OR: - case CCL_RPN_NOT: - case CCL_RPN_PROX: - pull_terms(nmem, n->u.p[0], termlist, num); - pull_terms(nmem, n->u.p[1], termlist, num); - break; - case CCL_RPN_TERM: - nmem_strsplit(nmem, " ", n->u.t.term, &words, &numwords); - for (i = 0; i < numwords; i++) - termlist[(*num)++] = words[i]; - break; - default: // NOOP - break; - } -} - - -static void add_facet(struct session *s, const char *type, const char *value) -{ - int i; - - if (!*value) - return; - for (i = 0; i < s->num_termlists; i++) - if (!strcmp(s->termlists[i].name, type)) - break; - if (i == s->num_termlists) - { - if (i == SESSION_MAX_TERMLISTS) - { - yaz_log(YLOG_FATAL, "Too many termlists"); - return; - } - - s->termlists[i].name = nmem_strdup(s->nmem, type); - s->termlists[i].termlist - = termlist_create(s->nmem, TERMLIST_HIGH_SCORE); - s->num_termlists = i + 1; - } - termlist_insert(s->termlists[i].termlist, value); -} - -static xmlDoc *record_to_xml(struct session_database *sdb, const char *rec) -{ - struct database *db = sdb->database; - xmlDoc *rdoc = 0; - - rdoc = xmlParseMemory(rec, strlen(rec)); - - if (!rdoc) - { - yaz_log(YLOG_FATAL, "Non-wellformed XML received from %s", - db->url); - return 0; - } - - if (global_parameters.dump_records) - { - yaz_log(YLOG_LOG, "Un-normalized record from %s", db->url); - log_xml_doc(rdoc); - } - - return rdoc; -} - -#define MAX_XSLT_ARGS 16 - -// Add static values from session database settings if applicable -static void insert_settings_parameters(struct session_database *sdb, - struct conf_service *service, - char **parms, - NMEM nmem) -{ - int i; - int nparms = 0; - int offset = 0; - - for (i = 0; i < service->num_metadata; i++) - { - struct conf_metadata *md = &service->metadata[i]; - int setting; - - if (md->setting == Metadata_setting_parameter && - (setting = settings_lookup_offset(service, md->name)) >= 0) - { - const char *val = session_setting_oneval(sdb, setting); - if (val && nparms < MAX_XSLT_ARGS) - { - char *buf; - int len = strlen(val); - buf = nmem_malloc(nmem, len + 3); - buf[0] = '\''; - strcpy(buf + 1, val); - buf[len+1] = '\''; - buf[len+2] = '\0'; - parms[offset++] = md->name; - parms[offset++] = buf; - nparms++; - } - } - } - parms[offset] = 0; -} - -// Add static values from session database settings if applicable -static void insert_settings_values(struct session_database *sdb, xmlDoc *doc, - struct conf_service *service) -{ - int i; - - for (i = 0; i < service->num_metadata; i++) - { - struct conf_metadata *md = &service->metadata[i]; - int offset; - - if (md->setting == Metadata_setting_postproc && - (offset = settings_lookup_offset(service, md->name)) >= 0) - { - const char *val = session_setting_oneval(sdb, offset); - if (val) - { - xmlNode *r = xmlDocGetRootElement(doc); - xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata", - (xmlChar *) val); - xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name); - } - } - } -} - -static xmlDoc *normalize_record(struct session_database *sdb, - struct conf_service *service, - const char *rec, NMEM nmem) -{ - xmlDoc *rdoc = record_to_xml(sdb, rec); - - if (rdoc) - { - char *parms[MAX_XSLT_ARGS*2+1]; - - insert_settings_parameters(sdb, service, parms, nmem); - - if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms)) - { - yaz_log(YLOG_WARN, "Normalize failed from %s", sdb->database->url); - } - else - { - insert_settings_values(sdb, rdoc, service); - - if (global_parameters.dump_records) - { - yaz_log(YLOG_LOG, "Normalized record from %s", - sdb->database->url); - log_xml_doc(rdoc); - } - } - } - return rdoc; -} - -void session_settings_dump(struct session *se, - struct session_database *db, - WRBUF w) -{ - if (db->settings) - { - int i, num = db->num_settings; - for (i = 0; i < num; i++) - { - struct setting *s = db->settings[i]; - for (;s ; s = s->next) - { - wrbuf_puts(w, "name); - wrbuf_puts(w, "\" value=\""); - wrbuf_xmlputs(w, s->value); - wrbuf_puts(w, "\"/>"); - } - if (db->settings[i]) - wrbuf_puts(w, "\n"); - } - } -} - -// Retrieve first defined value for 'name' for given database. -// Will be extended to take into account user associated with session -const char *session_setting_oneval(struct session_database *db, int offset) -{ - if (offset >= db->num_settings || !db->settings[offset]) - return ""; - return db->settings[offset]->value; -} - -// Prepare XSLT stylesheets for record normalization -// Structures are allocated on the session_wide nmem to avoid having -// to recompute this for every search. This would lead -// to leaking if a single session was to repeatedly change the PZ_XSLT -// setting. However, this is not a realistic use scenario. -static int prepare_map(struct session *se, struct session_database *sdb) -{ - const char *s; - - if (!sdb->settings) - { - yaz_log(YLOG_WARN, "No settings on %s", sdb->database->url); - return -1; - } - if ((s = session_setting_oneval(sdb, PZ_XSLT))) - { - char auto_stylesheet[256]; - - if (!strcmp(s, "auto")) - { - const char *request_syntax = session_setting_oneval( - sdb, PZ_REQUESTSYNTAX); - if (request_syntax) - { - char *cp; - yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet), - "%s.xsl", request_syntax); - for (cp = auto_stylesheet; *cp; cp++) - { - /* deliberately only consider ASCII */ - if (*cp > 32 && *cp < 127) - *cp = tolower(*cp); - } - s = auto_stylesheet; - } - else - { - yaz_log(YLOG_WARN, "No pz:requestsyntax for auto stylesheet"); - } - } - sdb->map = normalize_cache_get(se->normalize_cache, - se->service, s); - if (!sdb->map) - return -1; - } - return 0; -} - -// This analyzes settings and recomputes any supporting data structures -// if necessary. -static int prepare_session_database(struct session *se, - struct session_database *sdb) -{ - if (!sdb->settings) - { - yaz_log(YLOG_WARN, - "No settings associated with %s", sdb->database->url); - return -1; - } - if (sdb->settings[PZ_XSLT] && !sdb->map) - { - if (prepare_map(se, sdb) < 0) - return -1; - } - return 0; -} - -// called if watch should be removed because http_channel is to be destroyed -static void session_watch_cancel(void *data, struct http_channel *c, - void *data2) -{ - struct session_watchentry *ent = data; - - ent->fun = 0; - ent->data = 0; - ent->obs = 0; -} - -// set watch. Returns 0=OK, -1 if watch is already set -int session_set_watch(struct session *s, int what, - session_watchfun fun, void *data, - struct http_channel *chan) -{ - if (s->watchlist[what].fun) - return -1; - s->watchlist[what].fun = fun; - s->watchlist[what].data = data; - s->watchlist[what].obs = http_add_observer(chan, &s->watchlist[what], - session_watch_cancel); - return 0; -} - -void session_alert_watch(struct session *s, int what) -{ - if (s->watchlist[what].fun) - { - /* our watch is no longer associated with http_channel */ - void *data; - session_watchfun fun; - - http_remove_observer(s->watchlist[what].obs); - fun = s->watchlist[what].fun; - data = s->watchlist[what].data; - - /* reset watch before fun is invoked - in case fun wants to set - it again */ - s->watchlist[what].fun = 0; - s->watchlist[what].data = 0; - s->watchlist[what].obs = 0; - - fun(data); - } -} - -//callback for grep_databases -static void select_targets_callback(void *context, struct session_database *db) -{ - struct session *se = (struct session*) context; - struct client *cl = client_create(); - client_set_database(cl, db); - client_set_session(cl, se); -} - -// Associates a set of clients with a session; -// Note: Session-databases represent databases with per-session -// setting overrides -static int select_targets(struct session *se, const char *filter) -{ - while (se->clients) - client_destroy(se->clients); - - return session_grep_databases(se, filter, select_targets_callback); -} - -int session_active_clients(struct session *s) -{ - struct client *c; - int res = 0; - - for (c = s->clients; c; c = client_next_in_session(c)) - if (client_is_active(c)) - res++; - - return res; -} - - -enum pazpar2_error_code search(struct session *se, - const char *query, - const char *startrecs, const char *maxrecs, - const char *filter, - const char **addinfo) -{ - int live_channels = 0; - int no_working = 0; - int no_failed = 0; - struct client *cl; - - yaz_log(YLOG_DEBUG, "Search"); - - *addinfo = 0; - - session_enter(se); - nmem_reset(se->nmem); - se->relevance = 0; - se->total_records = se->total_hits = se->total_merged = 0; - reclist_destroy(se->reclist); - se->reclist = 0; - se->num_termlists = 0; - live_channels = select_targets(se, filter); - if (!live_channels) - { - session_leave(se); - return PAZPAR2_NO_TARGETS; - } - se->reclist = reclist_create(se->nmem); - - for (cl = se->clients; cl; cl = client_next_in_session(cl)) - { - if (maxrecs) - client_set_maxrecs(cl, atoi(maxrecs)); - if (startrecs) - client_set_startrecs(cl, atoi(startrecs)); - if (prepare_session_database(se, client_get_database(cl)) < 0) - continue; - // Parse query for target - if (client_parse_query(cl, query) < 0) - no_failed++; - else - { - no_working++; - if (client_prep_connection(cl, se->service->z3950_operation_timeout, - se->service->z3950_session_timeout, - se->service->server->iochan_man)) - client_start_search(cl); - } - } - session_leave(se); - if (no_working == 0) - { - if (no_failed > 0) - { - *addinfo = "query"; - return PAZPAR2_MALFORMED_PARAMETER_VALUE; - } - else - return PAZPAR2_NO_TARGETS; - } - return PAZPAR2_NO_ERROR; -} - -// Creates a new session_database object for a database -static void session_init_databases_fun(void *context, struct database *db) -{ - struct session *se = (struct session *) context; - struct session_database *new = nmem_malloc(se->session_nmem, sizeof(*new)); - int i; - - new->database = db; - - new->map = 0; - assert(db->settings); - new->settings = nmem_malloc(se->session_nmem, - sizeof(struct settings *) * db->num_settings); - new->num_settings = db->num_settings; - for (i = 0; i < db->num_settings; i++) - { - struct setting *setting = db->settings[i]; - new->settings[i] = setting; - } - new->next = se->databases; - se->databases = new; -} - -// Doesn't free memory associated with sdb -- nmem takes care of that -static void session_database_destroy(struct session_database *sdb) -{ - sdb->map = 0; -} - -// Initialize session_database list -- this represents this session's view -// of the database list -- subject to modification by the settings ws command -void session_init_databases(struct session *se) -{ - se->databases = 0; - predef_grep_databases(se, se->service, session_init_databases_fun); -} - -// Probably session_init_databases_fun should be refactored instead of -// called here. -static struct session_database *load_session_database(struct session *se, - char *id) -{ - struct database *db = new_database(id, se->session_nmem); - - resolve_database(se->service, db); - - session_init_databases_fun((void*) se, db); - - // New sdb is head of se->databases list - return se->databases; -} - -// Find an existing session database. If not found, load it -static struct session_database *find_session_database(struct session *se, - char *id) -{ - struct session_database *sdb; - - for (sdb = se->databases; sdb; sdb = sdb->next) - if (!strcmp(sdb->database->url, id)) - return sdb; - return load_session_database(se, id); -} - -// Apply a session override to a database -void session_apply_setting(struct session *se, char *dbname, char *setting, - char *value) -{ - struct session_database *sdb = find_session_database(se, dbname); - struct conf_service *service = se->service; - struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new)); - int offset = settings_create_offset(service, setting); - - expand_settings_array(&sdb->settings, &sdb->num_settings, offset, - se->session_nmem); - new->precedence = 0; - new->target = dbname; - new->name = setting; - new->value = value; - new->next = sdb->settings[offset]; - sdb->settings[offset] = new; - - // Force later recompute of settings-driven data structures - // (happens when a search starts and client connections are prepared) - switch (offset) - { - case PZ_XSLT: - if (sdb->map) - { - sdb->map = 0; - } - break; - } -} - -void destroy_session(struct session *s) -{ - struct session_database *sdb; - - while (s->clients) - client_destroy(s->clients); - for (sdb = s->databases; sdb; sdb = sdb->next) - session_database_destroy(sdb); - normalize_cache_destroy(s->normalize_cache); - reclist_destroy(s->reclist); - nmem_destroy(s->nmem); - service_destroy(s->service); - yaz_mutex_destroy(&s->mutex); - wrbuf_destroy(s->wrbuf); -} - -struct session *new_session(NMEM nmem, struct conf_service *service) -{ - int i; - struct session *session = nmem_malloc(nmem, sizeof(*session)); - - yaz_log(YLOG_DEBUG, "New Pazpar2 session"); - - session->service = service; - session->relevance = 0; - session->total_hits = 0; - session->total_records = 0; - session->number_of_warnings_unknown_elements = 0; - session->number_of_warnings_unknown_metadata = 0; - session->num_termlists = 0; - session->reclist = 0; - session->clients = 0; - session->session_nmem = nmem; - session->nmem = nmem_create(); - session->wrbuf = wrbuf_alloc(); - session->databases = 0; - for (i = 0; i <= SESSION_WATCH_MAX; i++) - { - session->watchlist[i].data = 0; - session->watchlist[i].fun = 0; - } - session->normalize_cache = normalize_cache_create(); - session->mutex = 0; - yaz_mutex_create(&session->mutex); - - return session; -} - -struct hitsbytarget *hitsbytarget(struct session *se, int *count, NMEM nmem) -{ - struct hitsbytarget *res = 0; - struct client *cl; - size_t sz = 0; - - session_enter(se); - for (cl = se->clients; cl; cl = client_next_in_session(cl)) - sz++; - - res = nmem_malloc(nmem, sizeof(*res) * sz); - *count = 0; - for (cl = se->clients; cl; cl = client_next_in_session(cl)) - { - WRBUF w = wrbuf_alloc(); - const char *name = session_setting_oneval(client_get_database(cl), - PZ_NAME); - - res[*count].id = client_get_database(cl)->database->url; - res[*count].name = *name ? name : "Unknown"; - res[*count].hits = client_get_hits(cl); - res[*count].records = client_get_num_records(cl); - res[*count].diagnostic = client_get_diagnostic(cl); - res[*count].state = client_get_state_str(cl); - res[*count].connected = client_get_connection(cl) ? 1 : 0; - session_settings_dump(se, client_get_database(cl), w); - res[*count].settings_xml = w; - (*count)++; - } - session_leave(se); - return res; -} - -struct termlist_score **termlist(struct session *s, const char *name, int *num) -{ - int i; - struct termlist_score **tl = 0; - - session_enter(s); - for (i = 0; i < s->num_termlists; i++) - if (!strcmp((const char *) s->termlists[i].name, name)) - { - tl = termlist_highscore(s->termlists[i].termlist, num); - break; - } - session_leave(s); - return tl; -} - -#ifdef MISSING_HEADERS -void report_nmem_stats(void) -{ - size_t in_use, is_free; - - nmem_get_memory_in_use(&in_use); - nmem_get_memory_free(&is_free); - - yaz_log(YLOG_LOG, "nmem stat: use=%ld free=%ld", - (long) in_use, (long) is_free); -} -#endif - -struct record_cluster *show_single_start(struct session *s, const char *id, - struct record_cluster **prev_r, - struct record_cluster **next_r) -{ - struct record_cluster *r; - - session_enter(s); - reclist_enter(s->reclist); - *prev_r = 0; - *next_r = 0; - while ((r = reclist_read_record(s->reclist))) - { - if (!strcmp(r->recid, id)) - { - *next_r = reclist_read_record(s->reclist); - break; - } - *prev_r = r; - } - reclist_leave(s->reclist); - if (!r) - session_leave(s); - return r; -} - -void show_single_stop(struct session *s, struct record_cluster *rec) -{ - session_leave(s); -} - -struct record_cluster **show_range_start(struct session *s, - struct reclist_sortparms *sp, - int start, int *num, int *total, Odr_int *sumhits) -{ - struct record_cluster **recs = nmem_malloc(s->nmem, *num - * sizeof(struct record_cluster *)); - struct reclist_sortparms *spp; - int i; -#if USE_TIMING - yaz_timing_t t = yaz_timing_create(); -#endif - - session_enter(s); - if (!s->relevance) - { - *num = 0; - *total = 0; - *sumhits = 0; - recs = 0; - } - else - { - for (spp = sp; spp; spp = spp->next) - if (spp->type == Metadata_sortkey_relevance) - { - relevance_prepare_read(s->relevance, s->reclist); - break; - } - reclist_sort(s->reclist, sp); - - reclist_enter(s->reclist); - *total = reclist_get_num_records(s->reclist); - *sumhits = s->total_hits; - - for (i = 0; i < start; i++) - if (!reclist_read_record(s->reclist)) - { - *num = 0; - recs = 0; - break; - } - - for (i = 0; i < *num; i++) - { - struct record_cluster *r = reclist_read_record(s->reclist); - if (!r) - { - *num = i; - break; - } - recs[i] = r; - } - reclist_leave(s->reclist); - } -#if USE_TIMING - yaz_timing_stop(t); - yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f", - yaz_timing_get_real(t), yaz_timing_get_user(t), - yaz_timing_get_sys(t)); - yaz_timing_destroy(&t); -#endif - return recs; -} - -void show_range_stop(struct session *s, struct record_cluster **recs) -{ - session_leave(s); -} - -void statistics(struct session *se, struct statistics *stat) -{ - struct client *cl; - int count = 0; - - memset(stat, 0, sizeof(*stat)); - for (cl = se->clients; cl; cl = client_next_in_session(cl)) - { - if (!client_get_connection(cl)) - stat->num_no_connection++; - switch (client_get_state(cl)) - { - case Client_Connecting: stat->num_connecting++; break; - case Client_Working: stat->num_working++; break; - case Client_Idle: stat->num_idle++; break; - case Client_Failed: stat->num_failed++; break; - case Client_Error: stat->num_error++; break; - default: break; - } - count++; - } - stat->num_hits = se->total_hits; - stat->num_records = se->total_records; - - stat->num_clients = count; -} - -static struct record_metadata *record_metadata_init( - NMEM nmem, const char *value, enum conf_metadata_type type, - struct _xmlAttr *attr) -{ - struct record_metadata *rec_md = record_metadata_create(nmem); - struct record_metadata_attr **attrp = &rec_md->attributes; - - for (; attr; attr = attr->next) - { - if (attr->children && attr->children->content) - { - if (strcmp((const char *) attr->name, "type")) - { /* skip the "type" attribute.. Its value is already part of - the element in output (md-%s) and so repeating it here - is redundant */ - *attrp = nmem_malloc(nmem, sizeof(**attrp)); - (*attrp)->name = - nmem_strdup(nmem, (const char *) attr->name); - (*attrp)->value = - nmem_strdup(nmem, (const char *) attr->children->content); - attrp = &(*attrp)->next; - } - } - } - *attrp = 0; - - if (type == Metadata_type_generic) - { - char *p = nmem_strdup(nmem, value); - - p = normalize7bit_generic(p, " ,/.:(["); - - rec_md->data.text.disp = p; - rec_md->data.text.sort = 0; - } - else if (type == Metadata_type_year || type == Metadata_type_date) - { - int first, last; - int longdate = 0; - - if (type == Metadata_type_date) - longdate = 1; - if (extract7bit_dates((char *) value, &first, &last, longdate) < 0) - return 0; - - rec_md->data.number.min = first; - rec_md->data.number.max = last; - } - else - return 0; - return rec_md; -} - -static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, - struct conf_service *service, WRBUF norm_wr) -{ - xmlNode *n; - int no_found = 0; - for (n = root->children; n; n = n->next) - { - if (n->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) n->name, "metadata")) - { - xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); - if (!strcmp(name, (const char *) type)) - { - xmlChar *value = xmlNodeListGetString(doc, n->children, 1); - if (value) - { - const char *norm_str; - pp2_relevance_token_t prt = - pp2_relevance_tokenize( - service->mergekey_pct, - (const char *) value, 0); - - if (wrbuf_len(norm_wr) > 0) - wrbuf_puts(norm_wr, " "); - wrbuf_puts(norm_wr, name); - while ((norm_str = - pp2_relevance_token_next(prt))) - { - if (*norm_str) - { - wrbuf_puts(norm_wr, " "); - wrbuf_puts(norm_wr, norm_str); - } - } - xmlFree(value); - pp2_relevance_token_destroy(prt); - no_found++; - } - } - xmlFree(type); - } - } - return no_found; -} - -static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, - struct conf_service *service, NMEM nmem) -{ - char *mergekey_norm = 0; - xmlNode *root = xmlDocGetRootElement(doc); - WRBUF norm_wr = wrbuf_alloc(); - - /* consider mergekey from XSL first */ - xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey"); - if (mergekey) - { - const char *norm_str; - pp2_relevance_token_t prt = - pp2_relevance_tokenize( - service->mergekey_pct, - (const char *) mergekey, 0); - - while ((norm_str = pp2_relevance_token_next(prt))) - { - if (*norm_str) - { - if (wrbuf_len(norm_wr)) - wrbuf_puts(norm_wr, " "); - wrbuf_puts(norm_wr, norm_str); - } - } - pp2_relevance_token_destroy(prt); - xmlFree(mergekey); - } - else - { - /* no mergekey defined in XSL. Look for mergekey metadata instead */ - int field_id; - for (field_id = 0; field_id < service->num_metadata; field_id++) - { - struct conf_metadata *ser_md = &service->metadata[field_id]; - if (ser_md->mergekey != Metadata_mergekey_no) - { - int r = get_mergekey_from_doc(doc, root, ser_md->name, - service, norm_wr); - if (r == 0 && ser_md->mergekey == Metadata_mergekey_required) - { - /* no mergekey on this one and it is required.. - Generate unique key instead */ - wrbuf_rewind(norm_wr); - break; - } - } - } - } - - /* generate unique key if none is not generated already or is empty */ - if (wrbuf_len(norm_wr) == 0) - { - wrbuf_printf(norm_wr, "%s-%d", - client_get_database(cl)->database->url, record_no); - } - if (wrbuf_len(norm_wr) > 0) - mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr)); - wrbuf_destroy(norm_wr); - return mergekey_norm; -} - -/** \brief see if metadata for pz:recordfilter exists - \param root xml root element of normalized record - \param sdb session database for client - \retval 0 if there is no metadata for pz:recordfilter - \retval 1 if there is metadata for pz:recordfilter - - If there is no pz:recordfilter defined, this function returns 1 - as well. -*/ - -static int check_record_filter(xmlNode *root, struct session_database *sdb) -{ - int match = 0; - xmlNode *n; - const char *s; - s = session_setting_oneval(sdb, PZ_RECORDFILTER); - - if (!s || !*s) - return 1; - - for (n = root->children; n; n = n->next) - { - if (n->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) n->name, "metadata")) - { - xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); - if (type) - { - size_t len; - const char *eq = strchr(s, '~'); - if (eq) - len = eq - s; - else - len = strlen(s); - if (len == strlen((const char *)type) && - !memcmp((const char *) type, s, len)) - { - xmlChar *value = xmlNodeGetContent(n); - if (value && *value) - { - if (!eq || strstr((const char *) value, eq+1)) - match = 1; - } - xmlFree(value); - } - xmlFree(type); - } - } - } - return match; -} - - -static int ingest_to_cluster(struct client *cl, - xmlDoc *xdoc, - xmlNode *root, - int record_no, - const char *mergekey_norm); - -/** \brief ingest XML record - \param cl client holds the result set for record - \param rec record buffer (0 terminated) - \param record_no record position (1, 2, ..) - \retval 0 OK - \retval -1 failure -*/ -int ingest_record(struct client *cl, const char *rec, - int record_no, NMEM nmem) -{ - struct session_database *sdb = client_get_database(cl); - struct session *se = client_get_session(cl); - struct conf_service *service = se->service; - xmlDoc *xdoc = normalize_record(sdb, service, rec, nmem); - xmlNode *root; - const char *mergekey_norm; - int ret; - - if (!xdoc) - return -1; - - root = xmlDocGetRootElement(xdoc); - - if (!check_record_filter(root, sdb)) - { - yaz_log(YLOG_WARN, "Filtered out record no %d from %s", record_no, - sdb->database->url); - xmlFreeDoc(xdoc); - return -1; - } - - mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem); - if (!mergekey_norm) - { - yaz_log(YLOG_WARN, "Got no mergekey"); - xmlFreeDoc(xdoc); - return -1; - } - session_enter(se); - ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm); - session_leave(se); - - xmlFreeDoc(xdoc); - - return ret; -} - -static int ingest_to_cluster(struct client *cl, - xmlDoc *xdoc, - xmlNode *root, - int record_no, - const char *mergekey_norm) -{ - xmlNode *n; - xmlChar *type = 0; - xmlChar *value = 0; - struct session_database *sdb = client_get_database(cl); - struct session *se = client_get_session(cl); - struct conf_service *service = se->service; - struct record *record = record_create(se->nmem, - service->num_metadata, - service->num_sortkeys, cl, - record_no); - struct record_cluster *cluster = reclist_insert(se->reclist, - service, - record, - mergekey_norm, - &se->total_merged); - if (!cluster) - return -1; - if (global_parameters.dump_records) - yaz_log(YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid, - sdb->database->url, record_no); - relevance_newrec(se->relevance, cluster); - - // now parsing XML record and adding data to cluster or record metadata - for (n = root->children; n; n = n->next) - { - pp2_relevance_token_t prt; - if (type) - xmlFree(type); - if (value) - xmlFree(value); - type = value = 0; - - if (n->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) n->name, "metadata")) - { - struct conf_metadata *ser_md = 0; - struct conf_sortkey *ser_sk = 0; - struct record_metadata **wheretoput = 0; - struct record_metadata *rec_md = 0; - int md_field_id = -1; - int sk_field_id = -1; - - type = xmlGetProp(n, (xmlChar *) "type"); - value = xmlNodeListGetString(xdoc, n->children, 1); - - if (!type || !value || !*value) - continue; - - md_field_id - = conf_service_metadata_field_id(service, (const char *) type); - if (md_field_id < 0) - { - if (se->number_of_warnings_unknown_metadata == 0) - { - yaz_log(YLOG_WARN, - "Ignoring unknown metadata element: %s", type); - } - se->number_of_warnings_unknown_metadata++; - continue; - } - - ser_md = &service->metadata[md_field_id]; - - if (ser_md->sortkey_offset >= 0){ - sk_field_id = ser_md->sortkey_offset; - ser_sk = &service->sortkeys[sk_field_id]; - } - - // non-merged metadata - rec_md = record_metadata_init(se->nmem, (const char *) value, - ser_md->type, n->properties); - if (!rec_md) - { - yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'", - value, type); - continue; - } - wheretoput = &record->metadata[md_field_id]; - while (*wheretoput) - wheretoput = &(*wheretoput)->next; - *wheretoput = rec_md; - - // merged metadata - rec_md = record_metadata_init(se->nmem, (const char *) value, - ser_md->type, 0); - wheretoput = &cluster->metadata[md_field_id]; - - // and polulate with data: - // assign cluster or record based on merge action - if (ser_md->merge == Metadata_merge_unique) - { - struct record_metadata *mnode; - for (mnode = *wheretoput; mnode; mnode = mnode->next) - if (!strcmp((const char *) mnode->data.text.disp, - rec_md->data.text.disp)) - break; - if (!mnode) - { - rec_md->next = *wheretoput; - *wheretoput = rec_md; - } - } - else if (ser_md->merge == Metadata_merge_longest) - { - if (!*wheretoput - || strlen(rec_md->data.text.disp) - > strlen((*wheretoput)->data.text.disp)) - { - *wheretoput = rec_md; - if (ser_sk) - { - const char *sort_str = 0; - int skip_article = - ser_sk->type == Metadata_sortkey_skiparticle; - - if (!cluster->sortkeys[sk_field_id]) - cluster->sortkeys[sk_field_id] = - nmem_malloc(se->nmem, - sizeof(union data_types)); - - prt = pp2_relevance_tokenize( - service->sort_pct, - rec_md->data.text.disp, skip_article); - - pp2_relevance_token_next(prt); - - sort_str = pp2_get_sort(prt); - - cluster->sortkeys[sk_field_id]->text.disp = - rec_md->data.text.disp; - if (!sort_str) - { - sort_str = rec_md->data.text.disp; - yaz_log(YLOG_WARN, - "Could not make sortkey. Bug #1858"); - } - cluster->sortkeys[sk_field_id]->text.sort = - nmem_strdup(se->nmem, sort_str); -#if 0 - yaz_log(YLOG_LOG, "text disp=%s", - cluster->sortkeys[sk_field_id]->text.disp); - yaz_log(YLOG_LOG, "text sort=%s", - cluster->sortkeys[sk_field_id]->text.sort); -#endif - pp2_relevance_token_destroy(prt); - } - } - } - else if (ser_md->merge == Metadata_merge_all) - { - rec_md->next = *wheretoput; - *wheretoput = rec_md; - } - else if (ser_md->merge == Metadata_merge_range) - { - if (!*wheretoput) - { - *wheretoput = rec_md; - if (ser_sk) - cluster->sortkeys[sk_field_id] - = &rec_md->data; - } - else - { - int this_min = rec_md->data.number.min; - int this_max = rec_md->data.number.max; - if (this_min < (*wheretoput)->data.number.min) - (*wheretoput)->data.number.min = this_min; - if (this_max > (*wheretoput)->data.number.max) - (*wheretoput)->data.number.max = this_max; - } - } - - - // ranking of _all_ fields enabled ... - if (ser_md->rank) - relevance_countwords(se->relevance, cluster, - (char *) value, ser_md->rank, - ser_md->name); - - // construct facets ... - if (ser_md->termlist) - { - if (ser_md->type == Metadata_type_year) - { - char year[64]; - sprintf(year, "%d", rec_md->data.number.max); - add_facet(se, (char *) type, year); - if (rec_md->data.number.max != rec_md->data.number.min) - { - sprintf(year, "%d", rec_md->data.number.min); - add_facet(se, (char *) type, year); - } - } - else - add_facet(se, (char *) type, (char *) value); - } - - // cleaning up - xmlFree(type); - xmlFree(value); - type = value = 0; - } - else - { - if (se->number_of_warnings_unknown_elements == 0) - yaz_log(YLOG_WARN, - "Unexpected element in internal record: %s", n->name); - se->number_of_warnings_unknown_elements++; - } - } - if (type) - xmlFree(type); - if (value) - xmlFree(value); - - relevance_donerecord(se->relevance, cluster); - se->total_records++; - - return 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * c-file-style: "Stroustrup" - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ - diff --git a/src/pazpar2.c b/src/pazpar2.c index f94d585..66e8a17 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "parameters.h" -#include "pazpar2.h" +#include "session.h" #include #include #include diff --git a/src/pazpar2.h b/src/pazpar2.h deleted file mode 100644 index c814be7..0000000 --- a/src/pazpar2.h +++ /dev/null @@ -1,186 +0,0 @@ -/* This file is part of Pazpar2. - Copyright (C) 2006-2010 Index Data - -Pazpar2 is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*/ - -#ifndef PAZPAR2_H -#define PAZPAR2_H - -#include -#include -#include -#include - -#include "termlists.h" -#include "reclists.h" -#include "http.h" - -struct record; -struct client; - - -enum pazpar2_error_code { - PAZPAR2_NO_ERROR = 0, - - PAZPAR2_NO_SESSION, - PAZPAR2_MISSING_PARAMETER, - PAZPAR2_MALFORMED_PARAMETER_VALUE, - PAZPAR2_MALFORMED_PARAMETER_ENCODING, - PAZPAR2_MALFORMED_SETTING, - PAZPAR2_HITCOUNTS_FAILED, - PAZPAR2_RECORD_MISSING, - PAZPAR2_NO_TARGETS, - PAZPAR2_CONFIG_TARGET, - PAZPAR2_RECORD_FAIL, - PAZPAR2_NOT_IMPLEMENTED, - PAZPAR2_NO_SERVICE, - - PAZPAR2_LAST_ERROR -}; - -struct host; -// Represents a (virtual) database on a host -struct database { - struct host *host; - char *url; - char **databases; - int errors; - struct zr_explain *explain; - int num_settings; - struct setting **settings; - struct database *next; -}; - - -// Represents a database as viewed from one session, possibly with settings overriden -// for that session -struct session_database -{ - struct database *database; - int num_settings; - struct setting **settings; - normalize_record_t map; - struct session_database *next; -}; - -#define SESSION_WATCH_SHOW 0 -#define SESSION_WATCH_RECORD 1 -#define SESSION_WATCH_MAX 1 - -#define SESSION_MAX_TERMLISTS 10 - -typedef void (*session_watchfun)(void *data); - -struct named_termlist -{ - char *name; - struct termlist *termlist; -}; - -struct session_watchentry { - void *data; - http_channel_observer_t obs; - session_watchfun fun; -}; - -// End-user session -struct session { - struct conf_service *service; /* service in use for this session */ - struct session_database *databases; // All databases, settings overriden - struct client *clients; // Clients connected for current search - NMEM session_nmem; // Nmem for session-permanent storage - NMEM nmem; // Nmem for each operation (i.e. search, result set, etc) - WRBUF wrbuf; // Wrbuf for scratch(i.e. search) - int num_termlists; - struct named_termlist termlists[SESSION_MAX_TERMLISTS]; - struct relevance *relevance; - struct reclist *reclist; - struct session_watchentry watchlist[SESSION_WATCH_MAX + 1]; - Odr_int total_hits; - int total_records; - int total_merged; - int number_of_warnings_unknown_elements; - int number_of_warnings_unknown_metadata; - normalize_cache_t normalize_cache; - YAZ_MUTEX mutex; -}; - -struct statistics { - int num_clients; - int num_no_connection; - int num_connecting; - int num_working; - int num_idle; - int num_failed; - int num_error; - Odr_int num_hits; - int num_records; -}; - -struct hitsbytarget { - char *id; - const char *name; - Odr_int hits; - int diagnostic; - int records; - const char *state; - int connected; - WRBUF settings_xml; -}; - -struct hitsbytarget *hitsbytarget(struct session *s, int *count, NMEM nmem); -struct session *new_session(NMEM nmem, struct conf_service *service); -void destroy_session(struct session *s); -void session_init_databases(struct session *s); -int load_targets(struct session *s, const char *fn); -void statistics(struct session *s, struct statistics *stat); -enum pazpar2_error_code search(struct session *s, const char *query, - const char *startrecs, const char *maxrecs, - const char *filter, const char **addinfo); -struct record_cluster **show_range_start(struct session *s, - struct reclist_sortparms *sp, - int start, - int *num, int *total, Odr_int *sumhits); -void show_range_stop(struct session *s, struct record_cluster **recs); - -struct record_cluster *show_single_start(struct session *s, const char *id, - struct record_cluster **prev_r, - struct record_cluster **next_r); -void show_single_stop(struct session *s, struct record_cluster *rec); -struct termlist_score **termlist(struct session *s, const char *name, int *num); -int session_set_watch(struct session *s, int what, session_watchfun fun, void *data, struct http_channel *c); -int session_active_clients(struct session *s); -void session_apply_setting(struct session *se, char *dbname, char *setting, char *value); -const char *session_setting_oneval(struct session_database *db, int offset); - -int host_getaddrinfo(struct host *host, iochan_man_t iochan_man); - -int ingest_record(struct client *cl, const char *rec, int record_no, NMEM nmem); -void session_alert_watch(struct session *s, int what); -void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num); - -#endif - -/* - * Local variables: - * c-basic-offset: 4 - * c-file-style: "Stroustrup" - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ - diff --git a/src/reclists.c b/src/reclists.c index 5efce55..a621d89 100644 --- a/src/reclists.c +++ b/src/reclists.c @@ -25,7 +25,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include -#include "pazpar2.h" +#include "session.h" #include "reclists.h" #include "jenkins_hash.h" diff --git a/src/relevance.c b/src/relevance.c index b7d613b..c6b7829 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -26,7 +26,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "relevance.h" -#include "pazpar2.h" +#include "session.h" struct relevance { diff --git a/src/session.c b/src/session.c new file mode 100644 index 0000000..bb0e915 --- /dev/null +++ b/src/session.c @@ -0,0 +1,1363 @@ +/* This file is part of Pazpar2. + Copyright (C) 2006-2010 Index Data + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +/** \file session.c + \brief high-level logic; mostly user sessions and settings +*/ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#if HAVE_SYS_TIME_H +#include +#endif +#if HAVE_UNISTD_H +#include +#endif +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define USE_TIMING 0 +#if USE_TIMING +#include +#endif + +#include "parameters.h" +#include "session.h" +#include "eventl.h" +#include "http.h" +#include "termlists.h" +#include "reclists.h" +#include "relevance.h" +#include "database.h" +#include "client.h" +#include "settings.h" +#include "normalize7bit.h" + +#define TERMLIST_HIGH_SCORE 25 + +#define MAX_CHUNK 15 + +// Note: Some things in this structure will eventually move to configuration +struct parameters global_parameters = +{ + 0, // dump_records + 0 // debug_mode +}; + +static void log_xml_doc(xmlDoc *doc) +{ + FILE *lf = yaz_log_file(); + xmlChar *result = 0; + int len = 0; +#if LIBXML_VERSION >= 20600 + xmlDocDumpFormatMemory(doc, &result, &len, 1); +#else + xmlDocDumpMemory(doc, &result, &len); +#endif + if (lf && len) + { + (void) fwrite(result, 1, len, lf); + fprintf(lf, "\n"); + } + xmlFree(result); +} + +static void session_enter(struct session *s) +{ + yaz_mutex_enter(s->mutex); +} + +static void session_leave(struct session *s) +{ + yaz_mutex_leave(s->mutex); +} + +// Recursively traverse query structure to extract terms. +void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) +{ + char **words; + int numwords; + int i; + + switch (n->kind) + { + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: + case CCL_RPN_PROX: + pull_terms(nmem, n->u.p[0], termlist, num); + pull_terms(nmem, n->u.p[1], termlist, num); + break; + case CCL_RPN_TERM: + nmem_strsplit(nmem, " ", n->u.t.term, &words, &numwords); + for (i = 0; i < numwords; i++) + termlist[(*num)++] = words[i]; + break; + default: // NOOP + break; + } +} + + +static void add_facet(struct session *s, const char *type, const char *value) +{ + int i; + + if (!*value) + return; + for (i = 0; i < s->num_termlists; i++) + if (!strcmp(s->termlists[i].name, type)) + break; + if (i == s->num_termlists) + { + if (i == SESSION_MAX_TERMLISTS) + { + yaz_log(YLOG_FATAL, "Too many termlists"); + return; + } + + s->termlists[i].name = nmem_strdup(s->nmem, type); + s->termlists[i].termlist + = termlist_create(s->nmem, TERMLIST_HIGH_SCORE); + s->num_termlists = i + 1; + } + termlist_insert(s->termlists[i].termlist, value); +} + +static xmlDoc *record_to_xml(struct session_database *sdb, const char *rec) +{ + struct database *db = sdb->database; + xmlDoc *rdoc = 0; + + rdoc = xmlParseMemory(rec, strlen(rec)); + + if (!rdoc) + { + yaz_log(YLOG_FATAL, "Non-wellformed XML received from %s", + db->url); + return 0; + } + + if (global_parameters.dump_records) + { + yaz_log(YLOG_LOG, "Un-normalized record from %s", db->url); + log_xml_doc(rdoc); + } + + return rdoc; +} + +#define MAX_XSLT_ARGS 16 + +// Add static values from session database settings if applicable +static void insert_settings_parameters(struct session_database *sdb, + struct conf_service *service, + char **parms, + NMEM nmem) +{ + int i; + int nparms = 0; + int offset = 0; + + for (i = 0; i < service->num_metadata; i++) + { + struct conf_metadata *md = &service->metadata[i]; + int setting; + + if (md->setting == Metadata_setting_parameter && + (setting = settings_lookup_offset(service, md->name)) >= 0) + { + const char *val = session_setting_oneval(sdb, setting); + if (val && nparms < MAX_XSLT_ARGS) + { + char *buf; + int len = strlen(val); + buf = nmem_malloc(nmem, len + 3); + buf[0] = '\''; + strcpy(buf + 1, val); + buf[len+1] = '\''; + buf[len+2] = '\0'; + parms[offset++] = md->name; + parms[offset++] = buf; + nparms++; + } + } + } + parms[offset] = 0; +} + +// Add static values from session database settings if applicable +static void insert_settings_values(struct session_database *sdb, xmlDoc *doc, + struct conf_service *service) +{ + int i; + + for (i = 0; i < service->num_metadata; i++) + { + struct conf_metadata *md = &service->metadata[i]; + int offset; + + if (md->setting == Metadata_setting_postproc && + (offset = settings_lookup_offset(service, md->name)) >= 0) + { + const char *val = session_setting_oneval(sdb, offset); + if (val) + { + xmlNode *r = xmlDocGetRootElement(doc); + xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata", + (xmlChar *) val); + xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name); + } + } + } +} + +static xmlDoc *normalize_record(struct session_database *sdb, + struct conf_service *service, + const char *rec, NMEM nmem) +{ + xmlDoc *rdoc = record_to_xml(sdb, rec); + + if (rdoc) + { + char *parms[MAX_XSLT_ARGS*2+1]; + + insert_settings_parameters(sdb, service, parms, nmem); + + if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms)) + { + yaz_log(YLOG_WARN, "Normalize failed from %s", sdb->database->url); + } + else + { + insert_settings_values(sdb, rdoc, service); + + if (global_parameters.dump_records) + { + yaz_log(YLOG_LOG, "Normalized record from %s", + sdb->database->url); + log_xml_doc(rdoc); + } + } + } + return rdoc; +} + +void session_settings_dump(struct session *se, + struct session_database *db, + WRBUF w) +{ + if (db->settings) + { + int i, num = db->num_settings; + for (i = 0; i < num; i++) + { + struct setting *s = db->settings[i]; + for (;s ; s = s->next) + { + wrbuf_puts(w, "name); + wrbuf_puts(w, "\" value=\""); + wrbuf_xmlputs(w, s->value); + wrbuf_puts(w, "\"/>"); + } + if (db->settings[i]) + wrbuf_puts(w, "\n"); + } + } +} + +// Retrieve first defined value for 'name' for given database. +// Will be extended to take into account user associated with session +const char *session_setting_oneval(struct session_database *db, int offset) +{ + if (offset >= db->num_settings || !db->settings[offset]) + return ""; + return db->settings[offset]->value; +} + +// Prepare XSLT stylesheets for record normalization +// Structures are allocated on the session_wide nmem to avoid having +// to recompute this for every search. This would lead +// to leaking if a single session was to repeatedly change the PZ_XSLT +// setting. However, this is not a realistic use scenario. +static int prepare_map(struct session *se, struct session_database *sdb) +{ + const char *s; + + if (!sdb->settings) + { + yaz_log(YLOG_WARN, "No settings on %s", sdb->database->url); + return -1; + } + if ((s = session_setting_oneval(sdb, PZ_XSLT))) + { + char auto_stylesheet[256]; + + if (!strcmp(s, "auto")) + { + const char *request_syntax = session_setting_oneval( + sdb, PZ_REQUESTSYNTAX); + if (request_syntax) + { + char *cp; + yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet), + "%s.xsl", request_syntax); + for (cp = auto_stylesheet; *cp; cp++) + { + /* deliberately only consider ASCII */ + if (*cp > 32 && *cp < 127) + *cp = tolower(*cp); + } + s = auto_stylesheet; + } + else + { + yaz_log(YLOG_WARN, "No pz:requestsyntax for auto stylesheet"); + } + } + sdb->map = normalize_cache_get(se->normalize_cache, + se->service, s); + if (!sdb->map) + return -1; + } + return 0; +} + +// This analyzes settings and recomputes any supporting data structures +// if necessary. +static int prepare_session_database(struct session *se, + struct session_database *sdb) +{ + if (!sdb->settings) + { + yaz_log(YLOG_WARN, + "No settings associated with %s", sdb->database->url); + return -1; + } + if (sdb->settings[PZ_XSLT] && !sdb->map) + { + if (prepare_map(se, sdb) < 0) + return -1; + } + return 0; +} + +// called if watch should be removed because http_channel is to be destroyed +static void session_watch_cancel(void *data, struct http_channel *c, + void *data2) +{ + struct session_watchentry *ent = data; + + ent->fun = 0; + ent->data = 0; + ent->obs = 0; +} + +// set watch. Returns 0=OK, -1 if watch is already set +int session_set_watch(struct session *s, int what, + session_watchfun fun, void *data, + struct http_channel *chan) +{ + if (s->watchlist[what].fun) + return -1; + s->watchlist[what].fun = fun; + s->watchlist[what].data = data; + s->watchlist[what].obs = http_add_observer(chan, &s->watchlist[what], + session_watch_cancel); + return 0; +} + +void session_alert_watch(struct session *s, int what) +{ + if (s->watchlist[what].fun) + { + /* our watch is no longer associated with http_channel */ + void *data; + session_watchfun fun; + + http_remove_observer(s->watchlist[what].obs); + fun = s->watchlist[what].fun; + data = s->watchlist[what].data; + + /* reset watch before fun is invoked - in case fun wants to set + it again */ + s->watchlist[what].fun = 0; + s->watchlist[what].data = 0; + s->watchlist[what].obs = 0; + + fun(data); + } +} + +//callback for grep_databases +static void select_targets_callback(void *context, struct session_database *db) +{ + struct session *se = (struct session*) context; + struct client *cl = client_create(); + client_set_database(cl, db); + client_set_session(cl, se); +} + +// Associates a set of clients with a session; +// Note: Session-databases represent databases with per-session +// setting overrides +static int select_targets(struct session *se, const char *filter) +{ + while (se->clients) + client_destroy(se->clients); + + return session_grep_databases(se, filter, select_targets_callback); +} + +int session_active_clients(struct session *s) +{ + struct client *c; + int res = 0; + + for (c = s->clients; c; c = client_next_in_session(c)) + if (client_is_active(c)) + res++; + + return res; +} + + +enum pazpar2_error_code search(struct session *se, + const char *query, + const char *startrecs, const char *maxrecs, + const char *filter, + const char **addinfo) +{ + int live_channels = 0; + int no_working = 0; + int no_failed = 0; + struct client *cl; + + yaz_log(YLOG_DEBUG, "Search"); + + *addinfo = 0; + + session_enter(se); + nmem_reset(se->nmem); + se->relevance = 0; + se->total_records = se->total_hits = se->total_merged = 0; + reclist_destroy(se->reclist); + se->reclist = 0; + se->num_termlists = 0; + live_channels = select_targets(se, filter); + if (!live_channels) + { + session_leave(se); + return PAZPAR2_NO_TARGETS; + } + se->reclist = reclist_create(se->nmem); + + for (cl = se->clients; cl; cl = client_next_in_session(cl)) + { + if (maxrecs) + client_set_maxrecs(cl, atoi(maxrecs)); + if (startrecs) + client_set_startrecs(cl, atoi(startrecs)); + if (prepare_session_database(se, client_get_database(cl)) < 0) + continue; + // Parse query for target + if (client_parse_query(cl, query) < 0) + no_failed++; + else + { + no_working++; + if (client_prep_connection(cl, se->service->z3950_operation_timeout, + se->service->z3950_session_timeout, + se->service->server->iochan_man)) + client_start_search(cl); + } + } + session_leave(se); + if (no_working == 0) + { + if (no_failed > 0) + { + *addinfo = "query"; + return PAZPAR2_MALFORMED_PARAMETER_VALUE; + } + else + return PAZPAR2_NO_TARGETS; + } + return PAZPAR2_NO_ERROR; +} + +// Creates a new session_database object for a database +static void session_init_databases_fun(void *context, struct database *db) +{ + struct session *se = (struct session *) context; + struct session_database *new = nmem_malloc(se->session_nmem, sizeof(*new)); + int i; + + new->database = db; + + new->map = 0; + assert(db->settings); + new->settings = nmem_malloc(se->session_nmem, + sizeof(struct settings *) * db->num_settings); + new->num_settings = db->num_settings; + for (i = 0; i < db->num_settings; i++) + { + struct setting *setting = db->settings[i]; + new->settings[i] = setting; + } + new->next = se->databases; + se->databases = new; +} + +// Doesn't free memory associated with sdb -- nmem takes care of that +static void session_database_destroy(struct session_database *sdb) +{ + sdb->map = 0; +} + +// Initialize session_database list -- this represents this session's view +// of the database list -- subject to modification by the settings ws command +void session_init_databases(struct session *se) +{ + se->databases = 0; + predef_grep_databases(se, se->service, session_init_databases_fun); +} + +// Probably session_init_databases_fun should be refactored instead of +// called here. +static struct session_database *load_session_database(struct session *se, + char *id) +{ + struct database *db = new_database(id, se->session_nmem); + + resolve_database(se->service, db); + + session_init_databases_fun((void*) se, db); + + // New sdb is head of se->databases list + return se->databases; +} + +// Find an existing session database. If not found, load it +static struct session_database *find_session_database(struct session *se, + char *id) +{ + struct session_database *sdb; + + for (sdb = se->databases; sdb; sdb = sdb->next) + if (!strcmp(sdb->database->url, id)) + return sdb; + return load_session_database(se, id); +} + +// Apply a session override to a database +void session_apply_setting(struct session *se, char *dbname, char *setting, + char *value) +{ + struct session_database *sdb = find_session_database(se, dbname); + struct conf_service *service = se->service; + struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new)); + int offset = settings_create_offset(service, setting); + + expand_settings_array(&sdb->settings, &sdb->num_settings, offset, + se->session_nmem); + new->precedence = 0; + new->target = dbname; + new->name = setting; + new->value = value; + new->next = sdb->settings[offset]; + sdb->settings[offset] = new; + + // Force later recompute of settings-driven data structures + // (happens when a search starts and client connections are prepared) + switch (offset) + { + case PZ_XSLT: + if (sdb->map) + { + sdb->map = 0; + } + break; + } +} + +void destroy_session(struct session *s) +{ + struct session_database *sdb; + + while (s->clients) + client_destroy(s->clients); + for (sdb = s->databases; sdb; sdb = sdb->next) + session_database_destroy(sdb); + normalize_cache_destroy(s->normalize_cache); + reclist_destroy(s->reclist); + nmem_destroy(s->nmem); + service_destroy(s->service); + yaz_mutex_destroy(&s->mutex); + wrbuf_destroy(s->wrbuf); +} + +struct session *new_session(NMEM nmem, struct conf_service *service) +{ + int i; + struct session *session = nmem_malloc(nmem, sizeof(*session)); + + yaz_log(YLOG_DEBUG, "New Pazpar2 session"); + + session->service = service; + session->relevance = 0; + session->total_hits = 0; + session->total_records = 0; + session->number_of_warnings_unknown_elements = 0; + session->number_of_warnings_unknown_metadata = 0; + session->num_termlists = 0; + session->reclist = 0; + session->clients = 0; + session->session_nmem = nmem; + session->nmem = nmem_create(); + session->wrbuf = wrbuf_alloc(); + session->databases = 0; + for (i = 0; i <= SESSION_WATCH_MAX; i++) + { + session->watchlist[i].data = 0; + session->watchlist[i].fun = 0; + } + session->normalize_cache = normalize_cache_create(); + session->mutex = 0; + yaz_mutex_create(&session->mutex); + + return session; +} + +struct hitsbytarget *hitsbytarget(struct session *se, int *count, NMEM nmem) +{ + struct hitsbytarget *res = 0; + struct client *cl; + size_t sz = 0; + + session_enter(se); + for (cl = se->clients; cl; cl = client_next_in_session(cl)) + sz++; + + res = nmem_malloc(nmem, sizeof(*res) * sz); + *count = 0; + for (cl = se->clients; cl; cl = client_next_in_session(cl)) + { + WRBUF w = wrbuf_alloc(); + const char *name = session_setting_oneval(client_get_database(cl), + PZ_NAME); + + res[*count].id = client_get_database(cl)->database->url; + res[*count].name = *name ? name : "Unknown"; + res[*count].hits = client_get_hits(cl); + res[*count].records = client_get_num_records(cl); + res[*count].diagnostic = client_get_diagnostic(cl); + res[*count].state = client_get_state_str(cl); + res[*count].connected = client_get_connection(cl) ? 1 : 0; + session_settings_dump(se, client_get_database(cl), w); + res[*count].settings_xml = w; + (*count)++; + } + session_leave(se); + return res; +} + +struct termlist_score **termlist(struct session *s, const char *name, int *num) +{ + int i; + struct termlist_score **tl = 0; + + session_enter(s); + for (i = 0; i < s->num_termlists; i++) + if (!strcmp((const char *) s->termlists[i].name, name)) + { + tl = termlist_highscore(s->termlists[i].termlist, num); + break; + } + session_leave(s); + return tl; +} + +#ifdef MISSING_HEADERS +void report_nmem_stats(void) +{ + size_t in_use, is_free; + + nmem_get_memory_in_use(&in_use); + nmem_get_memory_free(&is_free); + + yaz_log(YLOG_LOG, "nmem stat: use=%ld free=%ld", + (long) in_use, (long) is_free); +} +#endif + +struct record_cluster *show_single_start(struct session *s, const char *id, + struct record_cluster **prev_r, + struct record_cluster **next_r) +{ + struct record_cluster *r; + + session_enter(s); + reclist_enter(s->reclist); + *prev_r = 0; + *next_r = 0; + while ((r = reclist_read_record(s->reclist))) + { + if (!strcmp(r->recid, id)) + { + *next_r = reclist_read_record(s->reclist); + break; + } + *prev_r = r; + } + reclist_leave(s->reclist); + if (!r) + session_leave(s); + return r; +} + +void show_single_stop(struct session *s, struct record_cluster *rec) +{ + session_leave(s); +} + +struct record_cluster **show_range_start(struct session *s, + struct reclist_sortparms *sp, + int start, int *num, int *total, Odr_int *sumhits) +{ + struct record_cluster **recs = nmem_malloc(s->nmem, *num + * sizeof(struct record_cluster *)); + struct reclist_sortparms *spp; + int i; +#if USE_TIMING + yaz_timing_t t = yaz_timing_create(); +#endif + + session_enter(s); + if (!s->relevance) + { + *num = 0; + *total = 0; + *sumhits = 0; + recs = 0; + } + else + { + for (spp = sp; spp; spp = spp->next) + if (spp->type == Metadata_sortkey_relevance) + { + relevance_prepare_read(s->relevance, s->reclist); + break; + } + reclist_sort(s->reclist, sp); + + reclist_enter(s->reclist); + *total = reclist_get_num_records(s->reclist); + *sumhits = s->total_hits; + + for (i = 0; i < start; i++) + if (!reclist_read_record(s->reclist)) + { + *num = 0; + recs = 0; + break; + } + + for (i = 0; i < *num; i++) + { + struct record_cluster *r = reclist_read_record(s->reclist); + if (!r) + { + *num = i; + break; + } + recs[i] = r; + } + reclist_leave(s->reclist); + } +#if USE_TIMING + yaz_timing_stop(t); + yaz_log(YLOG_LOG, "show %6.5f %3.2f %3.2f", + yaz_timing_get_real(t), yaz_timing_get_user(t), + yaz_timing_get_sys(t)); + yaz_timing_destroy(&t); +#endif + return recs; +} + +void show_range_stop(struct session *s, struct record_cluster **recs) +{ + session_leave(s); +} + +void statistics(struct session *se, struct statistics *stat) +{ + struct client *cl; + int count = 0; + + memset(stat, 0, sizeof(*stat)); + for (cl = se->clients; cl; cl = client_next_in_session(cl)) + { + if (!client_get_connection(cl)) + stat->num_no_connection++; + switch (client_get_state(cl)) + { + case Client_Connecting: stat->num_connecting++; break; + case Client_Working: stat->num_working++; break; + case Client_Idle: stat->num_idle++; break; + case Client_Failed: stat->num_failed++; break; + case Client_Error: stat->num_error++; break; + default: break; + } + count++; + } + stat->num_hits = se->total_hits; + stat->num_records = se->total_records; + + stat->num_clients = count; +} + +static struct record_metadata *record_metadata_init( + NMEM nmem, const char *value, enum conf_metadata_type type, + struct _xmlAttr *attr) +{ + struct record_metadata *rec_md = record_metadata_create(nmem); + struct record_metadata_attr **attrp = &rec_md->attributes; + + for (; attr; attr = attr->next) + { + if (attr->children && attr->children->content) + { + if (strcmp((const char *) attr->name, "type")) + { /* skip the "type" attribute.. Its value is already part of + the element in output (md-%s) and so repeating it here + is redundant */ + *attrp = nmem_malloc(nmem, sizeof(**attrp)); + (*attrp)->name = + nmem_strdup(nmem, (const char *) attr->name); + (*attrp)->value = + nmem_strdup(nmem, (const char *) attr->children->content); + attrp = &(*attrp)->next; + } + } + } + *attrp = 0; + + if (type == Metadata_type_generic) + { + char *p = nmem_strdup(nmem, value); + + p = normalize7bit_generic(p, " ,/.:(["); + + rec_md->data.text.disp = p; + rec_md->data.text.sort = 0; + } + else if (type == Metadata_type_year || type == Metadata_type_date) + { + int first, last; + int longdate = 0; + + if (type == Metadata_type_date) + longdate = 1; + if (extract7bit_dates((char *) value, &first, &last, longdate) < 0) + return 0; + + rec_md->data.number.min = first; + rec_md->data.number.max = last; + } + else + return 0; + return rec_md; +} + +static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, + struct conf_service *service, WRBUF norm_wr) +{ + xmlNode *n; + int no_found = 0; + for (n = root->children; n; n = n->next) + { + if (n->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) n->name, "metadata")) + { + xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); + if (!strcmp(name, (const char *) type)) + { + xmlChar *value = xmlNodeListGetString(doc, n->children, 1); + if (value) + { + const char *norm_str; + pp2_relevance_token_t prt = + pp2_relevance_tokenize( + service->mergekey_pct, + (const char *) value, 0); + + if (wrbuf_len(norm_wr) > 0) + wrbuf_puts(norm_wr, " "); + wrbuf_puts(norm_wr, name); + while ((norm_str = + pp2_relevance_token_next(prt))) + { + if (*norm_str) + { + wrbuf_puts(norm_wr, " "); + wrbuf_puts(norm_wr, norm_str); + } + } + xmlFree(value); + pp2_relevance_token_destroy(prt); + no_found++; + } + } + xmlFree(type); + } + } + return no_found; +} + +static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, + struct conf_service *service, NMEM nmem) +{ + char *mergekey_norm = 0; + xmlNode *root = xmlDocGetRootElement(doc); + WRBUF norm_wr = wrbuf_alloc(); + + /* consider mergekey from XSL first */ + xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey"); + if (mergekey) + { + const char *norm_str; + pp2_relevance_token_t prt = + pp2_relevance_tokenize( + service->mergekey_pct, + (const char *) mergekey, 0); + + while ((norm_str = pp2_relevance_token_next(prt))) + { + if (*norm_str) + { + if (wrbuf_len(norm_wr)) + wrbuf_puts(norm_wr, " "); + wrbuf_puts(norm_wr, norm_str); + } + } + pp2_relevance_token_destroy(prt); + xmlFree(mergekey); + } + else + { + /* no mergekey defined in XSL. Look for mergekey metadata instead */ + int field_id; + for (field_id = 0; field_id < service->num_metadata; field_id++) + { + struct conf_metadata *ser_md = &service->metadata[field_id]; + if (ser_md->mergekey != Metadata_mergekey_no) + { + int r = get_mergekey_from_doc(doc, root, ser_md->name, + service, norm_wr); + if (r == 0 && ser_md->mergekey == Metadata_mergekey_required) + { + /* no mergekey on this one and it is required.. + Generate unique key instead */ + wrbuf_rewind(norm_wr); + break; + } + } + } + } + + /* generate unique key if none is not generated already or is empty */ + if (wrbuf_len(norm_wr) == 0) + { + wrbuf_printf(norm_wr, "%s-%d", + client_get_database(cl)->database->url, record_no); + } + if (wrbuf_len(norm_wr) > 0) + mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr)); + wrbuf_destroy(norm_wr); + return mergekey_norm; +} + +/** \brief see if metadata for pz:recordfilter exists + \param root xml root element of normalized record + \param sdb session database for client + \retval 0 if there is no metadata for pz:recordfilter + \retval 1 if there is metadata for pz:recordfilter + + If there is no pz:recordfilter defined, this function returns 1 + as well. +*/ + +static int check_record_filter(xmlNode *root, struct session_database *sdb) +{ + int match = 0; + xmlNode *n; + const char *s; + s = session_setting_oneval(sdb, PZ_RECORDFILTER); + + if (!s || !*s) + return 1; + + for (n = root->children; n; n = n->next) + { + if (n->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) n->name, "metadata")) + { + xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); + if (type) + { + size_t len; + const char *eq = strchr(s, '~'); + if (eq) + len = eq - s; + else + len = strlen(s); + if (len == strlen((const char *)type) && + !memcmp((const char *) type, s, len)) + { + xmlChar *value = xmlNodeGetContent(n); + if (value && *value) + { + if (!eq || strstr((const char *) value, eq+1)) + match = 1; + } + xmlFree(value); + } + xmlFree(type); + } + } + } + return match; +} + + +static int ingest_to_cluster(struct client *cl, + xmlDoc *xdoc, + xmlNode *root, + int record_no, + const char *mergekey_norm); + +/** \brief ingest XML record + \param cl client holds the result set for record + \param rec record buffer (0 terminated) + \param record_no record position (1, 2, ..) + \retval 0 OK + \retval -1 failure +*/ +int ingest_record(struct client *cl, const char *rec, + int record_no, NMEM nmem) +{ + struct session_database *sdb = client_get_database(cl); + struct session *se = client_get_session(cl); + struct conf_service *service = se->service; + xmlDoc *xdoc = normalize_record(sdb, service, rec, nmem); + xmlNode *root; + const char *mergekey_norm; + int ret; + + if (!xdoc) + return -1; + + root = xmlDocGetRootElement(xdoc); + + if (!check_record_filter(root, sdb)) + { + yaz_log(YLOG_WARN, "Filtered out record no %d from %s", record_no, + sdb->database->url); + xmlFreeDoc(xdoc); + return -1; + } + + mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem); + if (!mergekey_norm) + { + yaz_log(YLOG_WARN, "Got no mergekey"); + xmlFreeDoc(xdoc); + return -1; + } + session_enter(se); + ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm); + session_leave(se); + + xmlFreeDoc(xdoc); + + return ret; +} + +static int ingest_to_cluster(struct client *cl, + xmlDoc *xdoc, + xmlNode *root, + int record_no, + const char *mergekey_norm) +{ + xmlNode *n; + xmlChar *type = 0; + xmlChar *value = 0; + struct session_database *sdb = client_get_database(cl); + struct session *se = client_get_session(cl); + struct conf_service *service = se->service; + struct record *record = record_create(se->nmem, + service->num_metadata, + service->num_sortkeys, cl, + record_no); + struct record_cluster *cluster = reclist_insert(se->reclist, + service, + record, + mergekey_norm, + &se->total_merged); + if (!cluster) + return -1; + if (global_parameters.dump_records) + yaz_log(YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid, + sdb->database->url, record_no); + relevance_newrec(se->relevance, cluster); + + // now parsing XML record and adding data to cluster or record metadata + for (n = root->children; n; n = n->next) + { + pp2_relevance_token_t prt; + if (type) + xmlFree(type); + if (value) + xmlFree(value); + type = value = 0; + + if (n->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) n->name, "metadata")) + { + struct conf_metadata *ser_md = 0; + struct conf_sortkey *ser_sk = 0; + struct record_metadata **wheretoput = 0; + struct record_metadata *rec_md = 0; + int md_field_id = -1; + int sk_field_id = -1; + + type = xmlGetProp(n, (xmlChar *) "type"); + value = xmlNodeListGetString(xdoc, n->children, 1); + + if (!type || !value || !*value) + continue; + + md_field_id + = conf_service_metadata_field_id(service, (const char *) type); + if (md_field_id < 0) + { + if (se->number_of_warnings_unknown_metadata == 0) + { + yaz_log(YLOG_WARN, + "Ignoring unknown metadata element: %s", type); + } + se->number_of_warnings_unknown_metadata++; + continue; + } + + ser_md = &service->metadata[md_field_id]; + + if (ser_md->sortkey_offset >= 0){ + sk_field_id = ser_md->sortkey_offset; + ser_sk = &service->sortkeys[sk_field_id]; + } + + // non-merged metadata + rec_md = record_metadata_init(se->nmem, (const char *) value, + ser_md->type, n->properties); + if (!rec_md) + { + yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'", + value, type); + continue; + } + wheretoput = &record->metadata[md_field_id]; + while (*wheretoput) + wheretoput = &(*wheretoput)->next; + *wheretoput = rec_md; + + // merged metadata + rec_md = record_metadata_init(se->nmem, (const char *) value, + ser_md->type, 0); + wheretoput = &cluster->metadata[md_field_id]; + + // and polulate with data: + // assign cluster or record based on merge action + if (ser_md->merge == Metadata_merge_unique) + { + struct record_metadata *mnode; + for (mnode = *wheretoput; mnode; mnode = mnode->next) + if (!strcmp((const char *) mnode->data.text.disp, + rec_md->data.text.disp)) + break; + if (!mnode) + { + rec_md->next = *wheretoput; + *wheretoput = rec_md; + } + } + else if (ser_md->merge == Metadata_merge_longest) + { + if (!*wheretoput + || strlen(rec_md->data.text.disp) + > strlen((*wheretoput)->data.text.disp)) + { + *wheretoput = rec_md; + if (ser_sk) + { + const char *sort_str = 0; + int skip_article = + ser_sk->type == Metadata_sortkey_skiparticle; + + if (!cluster->sortkeys[sk_field_id]) + cluster->sortkeys[sk_field_id] = + nmem_malloc(se->nmem, + sizeof(union data_types)); + + prt = pp2_relevance_tokenize( + service->sort_pct, + rec_md->data.text.disp, skip_article); + + pp2_relevance_token_next(prt); + + sort_str = pp2_get_sort(prt); + + cluster->sortkeys[sk_field_id]->text.disp = + rec_md->data.text.disp; + if (!sort_str) + { + sort_str = rec_md->data.text.disp; + yaz_log(YLOG_WARN, + "Could not make sortkey. Bug #1858"); + } + cluster->sortkeys[sk_field_id]->text.sort = + nmem_strdup(se->nmem, sort_str); +#if 0 + yaz_log(YLOG_LOG, "text disp=%s", + cluster->sortkeys[sk_field_id]->text.disp); + yaz_log(YLOG_LOG, "text sort=%s", + cluster->sortkeys[sk_field_id]->text.sort); +#endif + pp2_relevance_token_destroy(prt); + } + } + } + else if (ser_md->merge == Metadata_merge_all) + { + rec_md->next = *wheretoput; + *wheretoput = rec_md; + } + else if (ser_md->merge == Metadata_merge_range) + { + if (!*wheretoput) + { + *wheretoput = rec_md; + if (ser_sk) + cluster->sortkeys[sk_field_id] + = &rec_md->data; + } + else + { + int this_min = rec_md->data.number.min; + int this_max = rec_md->data.number.max; + if (this_min < (*wheretoput)->data.number.min) + (*wheretoput)->data.number.min = this_min; + if (this_max > (*wheretoput)->data.number.max) + (*wheretoput)->data.number.max = this_max; + } + } + + + // ranking of _all_ fields enabled ... + if (ser_md->rank) + relevance_countwords(se->relevance, cluster, + (char *) value, ser_md->rank, + ser_md->name); + + // construct facets ... + if (ser_md->termlist) + { + if (ser_md->type == Metadata_type_year) + { + char year[64]; + sprintf(year, "%d", rec_md->data.number.max); + add_facet(se, (char *) type, year); + if (rec_md->data.number.max != rec_md->data.number.min) + { + sprintf(year, "%d", rec_md->data.number.min); + add_facet(se, (char *) type, year); + } + } + else + add_facet(se, (char *) type, (char *) value); + } + + // cleaning up + xmlFree(type); + xmlFree(value); + type = value = 0; + } + else + { + if (se->number_of_warnings_unknown_elements == 0) + yaz_log(YLOG_WARN, + "Unexpected element in internal record: %s", n->name); + se->number_of_warnings_unknown_elements++; + } + } + if (type) + xmlFree(type); + if (value) + xmlFree(value); + + relevance_donerecord(se->relevance, cluster); + se->total_records++; + + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/session.h b/src/session.h new file mode 100644 index 0000000..4aba337 --- /dev/null +++ b/src/session.h @@ -0,0 +1,186 @@ +/* This file is part of Pazpar2. + Copyright (C) 2006-2010 Index Data + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#ifndef PAZPAR2_SESSION_H +#define PAZPAR2_SESSION_H + +#include +#include +#include +#include + +#include "termlists.h" +#include "reclists.h" +#include "http.h" + +struct record; +struct client; + + +enum pazpar2_error_code { + PAZPAR2_NO_ERROR = 0, + + PAZPAR2_NO_SESSION, + PAZPAR2_MISSING_PARAMETER, + PAZPAR2_MALFORMED_PARAMETER_VALUE, + PAZPAR2_MALFORMED_PARAMETER_ENCODING, + PAZPAR2_MALFORMED_SETTING, + PAZPAR2_HITCOUNTS_FAILED, + PAZPAR2_RECORD_MISSING, + PAZPAR2_NO_TARGETS, + PAZPAR2_CONFIG_TARGET, + PAZPAR2_RECORD_FAIL, + PAZPAR2_NOT_IMPLEMENTED, + PAZPAR2_NO_SERVICE, + + PAZPAR2_LAST_ERROR +}; + +struct host; +// Represents a (virtual) database on a host +struct database { + struct host *host; + char *url; + char **databases; + int errors; + struct zr_explain *explain; + int num_settings; + struct setting **settings; + struct database *next; +}; + + +// Represents a database as viewed from one session, possibly with settings overriden +// for that session +struct session_database +{ + struct database *database; + int num_settings; + struct setting **settings; + normalize_record_t map; + struct session_database *next; +}; + +#define SESSION_WATCH_SHOW 0 +#define SESSION_WATCH_RECORD 1 +#define SESSION_WATCH_MAX 1 + +#define SESSION_MAX_TERMLISTS 10 + +typedef void (*session_watchfun)(void *data); + +struct named_termlist +{ + char *name; + struct termlist *termlist; +}; + +struct session_watchentry { + void *data; + http_channel_observer_t obs; + session_watchfun fun; +}; + +// End-user session +struct session { + struct conf_service *service; /* service in use for this session */ + struct session_database *databases; // All databases, settings overriden + struct client *clients; // Clients connected for current search + NMEM session_nmem; // Nmem for session-permanent storage + NMEM nmem; // Nmem for each operation (i.e. search, result set, etc) + WRBUF wrbuf; // Wrbuf for scratch(i.e. search) + int num_termlists; + struct named_termlist termlists[SESSION_MAX_TERMLISTS]; + struct relevance *relevance; + struct reclist *reclist; + struct session_watchentry watchlist[SESSION_WATCH_MAX + 1]; + Odr_int total_hits; + int total_records; + int total_merged; + int number_of_warnings_unknown_elements; + int number_of_warnings_unknown_metadata; + normalize_cache_t normalize_cache; + YAZ_MUTEX mutex; +}; + +struct statistics { + int num_clients; + int num_no_connection; + int num_connecting; + int num_working; + int num_idle; + int num_failed; + int num_error; + Odr_int num_hits; + int num_records; +}; + +struct hitsbytarget { + char *id; + const char *name; + Odr_int hits; + int diagnostic; + int records; + const char *state; + int connected; + WRBUF settings_xml; +}; + +struct hitsbytarget *hitsbytarget(struct session *s, int *count, NMEM nmem); +struct session *new_session(NMEM nmem, struct conf_service *service); +void destroy_session(struct session *s); +void session_init_databases(struct session *s); +int load_targets(struct session *s, const char *fn); +void statistics(struct session *s, struct statistics *stat); +enum pazpar2_error_code search(struct session *s, const char *query, + const char *startrecs, const char *maxrecs, + const char *filter, const char **addinfo); +struct record_cluster **show_range_start(struct session *s, + struct reclist_sortparms *sp, + int start, + int *num, int *total, Odr_int *sumhits); +void show_range_stop(struct session *s, struct record_cluster **recs); + +struct record_cluster *show_single_start(struct session *s, const char *id, + struct record_cluster **prev_r, + struct record_cluster **next_r); +void show_single_stop(struct session *s, struct record_cluster *rec); +struct termlist_score **termlist(struct session *s, const char *name, int *num); +int session_set_watch(struct session *s, int what, session_watchfun fun, void *data, struct http_channel *c); +int session_active_clients(struct session *s); +void session_apply_setting(struct session *se, char *dbname, char *setting, char *value); +const char *session_setting_oneval(struct session_database *db, int offset); + +int host_getaddrinfo(struct host *host, iochan_man_t iochan_man); + +int ingest_record(struct client *cl, const char *rec, int record_no, NMEM nmem); +void session_alert_watch(struct session *s, int what); +void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num); + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/settings.c b/src/settings.c index 57d3168..2d99c78 100644 --- a/src/settings.c +++ b/src/settings.c @@ -41,7 +41,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -#include "pazpar2.h" +#include "session.h" #include "database.h" #include "settings.h" diff --git a/win/makefile b/win/makefile index 068b65d..934bcb3 100644 --- a/win/makefile +++ b/win/makefile @@ -183,7 +183,7 @@ PAZPAR2_OBJS = \ "$(OBJDIR)\http.obj" \ "$(OBJDIR)\eventl.obj" \ "$(OBJDIR)\http_command.obj" \ - "$(OBJDIR)\logic.obj" \ + "$(OBJDIR)\session.obj" \ "$(OBJDIR)\record.obj" \ "$(OBJDIR)\reclists.obj" \ "$(OBJDIR)\relevance.obj" \ -- 1.7.10.4