X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fsession.c;h=3dcf40f28d3bc19138705edc666fcf94fbe28fdb;hb=85b1f355522cc620452552d76fd517f089c98ab2;hp=9e52d59f4492b128d9061973c747fbdf12e6fb6f;hpb=7db45d2bc967511916800d163b2c3e290c62da40;p=pazpar2-moved-to-github.git diff --git a/src/session.c b/src/session.c index 9e52d59..3dcf40f 100644 --- a/src/session.c +++ b/src/session.c @@ -1,5 +1,5 @@ /* This file is part of Pazpar2. - Copyright (C) 2006-2013 Index Data + Copyright (C) Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -56,8 +56,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include -#define USE_TIMING 1 +#define USE_TIMING 0 #if USE_TIMING #include #endif @@ -149,30 +150,19 @@ static void session_leave(struct session *s, const char *caller) session_log(s, YLOG_DEBUG, "Session unlock by %s", caller); } -static void session_normalize_facet(struct session *s, - const char *type, const char *value, - WRBUF display_wrbuf, WRBUF facet_wrbuf) +static int run_icu(struct session *s, const char *icu_chain_id, + const char *value, + WRBUF norm_wr, WRBUF disp_wr) { - struct conf_service *service = s->service; - pp2_charset_token_t prt; const char *facet_component; - int i; - const char *icu_chain_id = 0; - - for (i = 0; i < service->num_metadata; i++) - if (!strcmp((service->metadata + i)->name, type)) - icu_chain_id = (service->metadata + i)->facetrule; - if (!icu_chain_id) - icu_chain_id = "facet"; - prt = pp2_charset_token_create(service->charsets, icu_chain_id); + struct conf_service *service = s->service; + pp2_charset_token_t prt = + pp2_charset_token_create(service->charsets, icu_chain_id); if (!prt) { session_log(s, YLOG_FATAL, - "Unknown ICU chain '%s' for facet of type '%s'", - icu_chain_id, type); - wrbuf_destroy(facet_wrbuf); - wrbuf_destroy(display_wrbuf); - return; + "Unknown ICU chain '%s'", icu_chain_id); + return 0; } pp2_charset_token_first(prt, value, 0); while ((facet_component = pp2_charset_token_next(prt))) @@ -180,19 +170,37 @@ static void session_normalize_facet(struct session *s, const char *display_component; if (*facet_component) { - if (wrbuf_len(facet_wrbuf)) - wrbuf_puts(facet_wrbuf, " "); - wrbuf_puts(facet_wrbuf, facet_component); + if (wrbuf_len(norm_wr)) + wrbuf_puts(norm_wr, " "); + wrbuf_puts(norm_wr, facet_component); } display_component = pp2_get_display(prt); if (display_component) { - if (wrbuf_len(display_wrbuf)) - wrbuf_puts(display_wrbuf, " "); - wrbuf_puts(display_wrbuf, display_component); + if (wrbuf_len(disp_wr)) + wrbuf_puts(disp_wr, " "); + wrbuf_puts(disp_wr, display_component); } } pp2_charset_token_destroy(prt); + return 1; +} + +static void session_normalize_facet(struct session *s, + const char *type, const char *value, + WRBUF display_wrbuf, WRBUF facet_wrbuf) +{ + struct conf_service *service = s->service; + int i; + const char *icu_chain_id = 0; + + for (i = 0; i < service->num_metadata; i++) + if (!strcmp((service->metadata + i)->name, type)) + icu_chain_id = (service->metadata + i)->facetrule; + if (!icu_chain_id) + icu_chain_id = "facet"; + + run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf); } void add_facet(struct session *s, const char *type, const char *value, int count) @@ -204,29 +212,18 @@ void add_facet(struct session *s, const char *type, const char *value, int count if (wrbuf_len(facet_wrbuf)) { - int i; - for (i = 0; i < s->num_termlists; i++) - if (!strcmp(s->termlists[i].name, type)) + struct named_termlist **tp = &s->termlists; + for (; (*tp); tp = &(*tp)->next) + if (!strcmp((*tp)->name, type)) break; - if (i == s->num_termlists) + if (!*tp) { - if (i == SESSION_MAX_TERMLISTS) - { - session_log(s, YLOG_FATAL, "Too many termlists"); - wrbuf_destroy(facet_wrbuf); - wrbuf_destroy(display_wrbuf); - return; - } - - s->termlists[i].name = nmem_strdup(s->nmem, type); - s->termlists[i].termlist = termlist_create(s->nmem); - s->num_termlists = i + 1; + *tp = nmem_malloc(s->nmem, sizeof(**tp)); + (*tp)->name = nmem_strdup(s->nmem, type); + (*tp)->termlist = termlist_create(s->nmem); + (*tp)->next = 0; } - -#if 0 - session_log(s, YLOG_LOG, "Facets for %s: %s norm:%s (%d)", type, value, wrbuf_cstr(facet_wrbuf), count); -#endif - termlist_insert(s->termlists[i].termlist, wrbuf_cstr(display_wrbuf), + termlist_insert((*tp)->termlist, wrbuf_cstr(display_wrbuf), wrbuf_cstr(facet_wrbuf), count); } wrbuf_destroy(facet_wrbuf); @@ -604,8 +601,7 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp) nmem_total(se->nmem)); nmem_reset(se->nmem); se->total_records = se->total_merged = 0; - se->num_termlists = 0; - + se->termlists = 0; relevance_clear(se->relevance); /* reset list of sorted results and clear to relevance search */ @@ -621,10 +617,8 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp) se->reclist = reclist_create(se->nmem); } -static void session_sort_unlocked(struct session *se, - struct reclist_sortparms *sp, - const char *mergekey, - const char *rank) +void session_sort(struct session *se, struct reclist_sortparms *sp, + const char *mergekey, const char *rank) { struct client_list *l; const char *field = sp->name; @@ -632,6 +626,7 @@ static void session_sort_unlocked(struct session *se, int type = sp->type; int clients_research = 0; + session_enter(se, "session_sort"); session_log(se, YLOG_DEBUG, "session_sort field=%s increasing=%d type=%d", field, increasing, type); @@ -665,6 +660,7 @@ static void session_sort_unlocked(struct session *se, { session_log(se, YLOG_DEBUG, "session_sort: field=%s increasing=%d type=%d already fetched", field, increasing, type); + session_leave(se, "session_sort"); return; } } @@ -682,14 +678,7 @@ static void session_sort_unlocked(struct session *se, client_parse_init(cl, 1); clients_research += client_parse_sort(cl, sp); } - if (clients_research) - { - session_log(se, YLOG_DEBUG, - "session_sort: reset results due to %d clients researching", - clients_research); - session_clear_set(se, sp); - } - else + if (!clients_research || se->clients_starting) { // A new sorting based on same record set struct reclist_sortparms *sr = nmem_malloc(se->nmem, sizeof(*sr)); @@ -699,36 +688,66 @@ static void session_sort_unlocked(struct session *se, sr->next = se->sorted_results; se->sorted_results = sr; session_log(se, YLOG_DEBUG, "session_sort: no research/ingesting done"); - return ; + session_leave(se, "session_sort"); } - session_log(se, YLOG_DEBUG, "Re- search/ingesting for clients due to change in sort order"); - - for (l = se->clients_active; l; l = l->next) + else { - struct client *cl = l->client; - if (client_get_state(cl) == Client_Connecting || - client_get_state(cl) == Client_Idle || - client_get_state(cl) == Client_Working) { - client_start_search(cl); - } - else + se->clients_starting = 1; + session_log(se, YLOG_DEBUG, + "session_sort: reset results due to %d clients researching", + clients_research); + session_clear_set(se, sp); + session_log(se, YLOG_DEBUG, "Re- search/ingesting for clients due to change in sort order"); + + session_leave(se, "session_sort"); + for (l = se->clients_active; l; l = l->next) { - session_log(se, YLOG_DEBUG, - "session_sort: %s: No re-start/ingest in show. " - "Wrong client state: %d", - client_get_id(cl), client_get_state(cl)); + struct client *cl = l->client; + if (client_get_state(cl) == Client_Connecting || + client_get_state(cl) == Client_Idle || + client_get_state(cl) == Client_Working) { + client_start_search(cl); + } + else + { + session_log(se, YLOG_DEBUG, + "session_sort: %s: No re-start/ingest in show. " + "Wrong client state: %d", + client_get_id(cl), client_get_state(cl)); + } } + session_enter(se, "session_sort"); + se->clients_starting = 0; + session_leave(se, "session_sort"); } } -void session_sort(struct session *se, struct reclist_sortparms *sp, - const char *mergekey, const char *rank) +void session_stop(struct session *se) { - //session_enter(se, "session_sort"); - session_sort_unlocked(se, sp, mergekey, rank); - //session_leave(se, "session_sort"); -} + struct client_list *l; + session_enter(se, "session_stop1"); + if (se->clients_starting) + { + session_leave(se, "session_stop1"); + return; + } + se->clients_starting = 1; + session_leave(se, "session_stop1"); + + session_alert_watch(se, SESSION_WATCH_SHOW); + session_alert_watch(se, SESSION_WATCH_BYTARGET); + session_alert_watch(se, SESSION_WATCH_TERMLIST); + session_alert_watch(se, SESSION_WATCH_SHOW_PREF); + for (l = se->clients_active; l; l = l->next) + { + struct client *cl = l->client; + client_stop(cl); + } + session_enter(se, "session_stop2"); + se->clients_starting = 0; + session_leave(se, "session_stop2"); +} enum pazpar2_error_code session_search(struct session *se, const char *query, @@ -746,17 +765,21 @@ enum pazpar2_error_code session_search(struct session *se, int no_working = 0; int no_failed_query = 0; int no_failed_limit = 0; - struct client_list *l, *l0; - - session_alert_watch(se, SESSION_WATCH_SHOW); - session_alert_watch(se, SESSION_WATCH_BYTARGET); - session_alert_watch(se, SESSION_WATCH_TERMLIST); - session_alert_watch(se, SESSION_WATCH_SHOW_PREF); + struct client_list *l; session_log(se, YLOG_DEBUG, "Search"); *addinfo = 0; + session_enter(se, "session_search0"); + if (se->clients_starting) + { + session_leave(se, "session_search0"); + return PAZPAR2_NO_ERROR; + } + se->clients_starting = 1; + session_leave(se, "session_search0"); + if (se->settings_modified) { session_remove_cached_clients(se); } @@ -784,6 +807,7 @@ enum pazpar2_error_code session_search(struct session *se, if (!live_channels) { session_leave(se, "session_search"); + se->clients_starting = 0; return PAZPAR2_NO_TARGETS; } @@ -793,14 +817,18 @@ enum pazpar2_error_code session_search(struct session *se, { *addinfo = "limit"; session_leave(se, "session_search"); + se->clients_starting = 0; return PAZPAR2_MALFORMED_PARAMETER_VALUE; } - l0 = se->clients_active; - se->clients_active = 0; session_leave(se, "session_search"); - for (l = l0; l; l = l->next) + session_alert_watch(se, SESSION_WATCH_SHOW); + session_alert_watch(se, SESSION_WATCH_BYTARGET); + session_alert_watch(se, SESSION_WATCH_TERMLIST); + session_alert_watch(se, SESSION_WATCH_SHOW_PREF); + + for (l = se->clients_active; l; l = l->next) { int parse_ret; struct client *cl = l->client; @@ -823,8 +851,9 @@ enum pazpar2_error_code session_search(struct session *se, no_working++; } } - session_reset_active_clients(se, l0); - + session_enter(se, "session_search2"); + se->clients_starting = 0; + session_leave(se, "session_search2"); if (no_working == 0) { if (no_failed_query > 0) @@ -883,7 +912,7 @@ void session_init_databases(struct session *se) // Probably session_init_databases_fun should be refactored instead of // called here. static struct session_database *load_session_database(struct session *se, - char *id) + const char *id) { struct database *db = new_database_inherit_settings(id, se->session_nmem, se->service->settings); session_init_databases_fun((void*) se, db); @@ -894,7 +923,7 @@ static struct session_database *load_session_database(struct session *se, // Find an existing session database. If not found, load it static struct session_database *find_session_database(struct session *se, - char *id) + const char *id) { struct session_database *sdb; @@ -905,36 +934,39 @@ static struct session_database *find_session_database(struct session *se, } // Apply a session override to a database -void session_apply_setting(struct session *se, char *dbname, char *setting, - char *value) +void session_apply_setting(struct session *se, const char *dbname, + const char *name, const char *value) { - struct session_database *sdb = find_session_database(se, dbname); - struct conf_service *service = se->service; - struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new)); - int offset = settings_create_offset(service, setting); - - expand_settings_array(&sdb->settings, &sdb->num_settings, offset, - se->session_nmem); - new->precedence = 0; - new->target = dbname; - new->name = setting; - new->value = value; - new->next = sdb->settings[offset]; - sdb->settings[offset] = new; - - se->settings_modified = 1; - - // Force later recompute of settings-driven data structures - // (happens when a search starts and client connections are prepared) - switch (offset) - { - case PZ_XSLT: - if (sdb->map) - { + session_enter(se, "session_apply_setting"); + { + struct session_database *sdb = find_session_database(se, dbname); + struct conf_service *service = se->service; + struct setting *s; + int offset = settings_create_offset(service, name); + + expand_settings_array(&sdb->settings, &sdb->num_settings, offset, + se->session_nmem); + // Force later recompute of settings-driven data structures + // (happens when a search starts and client connections are prepared) + if (offset == PZ_XSLT) sdb->map = 0; + se->settings_modified = 1; + for (s = sdb->settings[offset]; s; s = s->next) + if (!strcmp(s->name, name) && + dbname && s->target && !strcmp(dbname, s->target)) + break; + if (!s) + { + s = nmem_malloc(se->session_nmem, sizeof(*s)); + s->precedence = 0; + s->target = nmem_strdup(se->session_nmem, dbname); + s->name = nmem_strdup(se->session_nmem, name); + s->next = sdb->settings[offset]; + sdb->settings[offset] = s; } - break; + s->value = nmem_strdup(se->session_nmem, value); } + session_leave(se, "session_apply_setting"); } void session_destroy(struct session *se) @@ -989,7 +1021,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service, session->total_records = 0; session->number_of_warnings_unknown_elements = 0; session->number_of_warnings_unknown_metadata = 0; - session->num_termlists = 0; + session->termlists = 0; session->reclist = reclist_create(nmem); session->clients_active = 0; session->clients_cached = 0; @@ -1001,6 +1033,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service, session->facet_limits = 0; session->mergekey = 0; session->rank = 0; + session->clients_starting = 0; for (i = 0; i <= SESSION_WATCH_MAX; i++) { @@ -1036,13 +1069,13 @@ static struct hitsbytarget *hitsbytarget_nb(struct session *se, WRBUF w = wrbuf_alloc(); const char *name = session_setting_oneval(client_get_database(cl), PZ_NAME); - res[*count].id = client_get_id(cl); res[*count].name = *name ? name : "Unknown"; res[*count].hits = client_get_hits(cl); res[*count].approximation = client_get_approximation(cl); - res[*count].records = client_get_num_records(cl); - res[*count].filtered = client_get_num_records_filtered(cl); + res[*count].records = client_get_num_records(cl, + &res[*count].filtered, + 0, 0); res[*count].diagnostic = client_get_diagnostic(cl, &res[*count].message, &res[*count].addinfo); @@ -1137,7 +1170,7 @@ static int targets_termlist_nb(WRBUF wrbuf, struct session *se, int num, void perform_termlist(struct http_channel *c, struct session *se, const char *name, int num, int version) { - int i, j; + int j; NMEM nmem_tmp = nmem_create(); char **names; int num_names = 0; @@ -1154,9 +1187,10 @@ void perform_termlist(struct http_channel *c, struct session *se, const char *tname; int must_generate_empty = 1; /* bug 5350 */ - for (i = 0; i < se->num_termlists; i++) + struct named_termlist *t = se->termlists; + for (; t; t = t->next) { - tname = se->termlists[i].name; + tname = t->name; if (!strcmp(names[j], tname) || !strcmp(names[j], "*")) { struct termlist_score **p = 0; @@ -1167,8 +1201,7 @@ void perform_termlist(struct http_channel *c, struct session *se, wrbuf_puts(c->wrbuf, "\">\n"); must_generate_empty = 0; - p = termlist_highscore(se->termlists[i].termlist, &len, - nmem_tmp); + p = termlist_highscore(t->termlist, &len, nmem_tmp); if (p) { int i; @@ -1278,8 +1311,24 @@ int session_fetch_more(struct session *se) } else { - session_log(se, YLOG_LOG, "%s: no more to fetch", - client_get_id(cl)); + int filtered; + int ingest_failures; + int record_failures; + int num = client_get_num_records( + cl, &filtered, &ingest_failures, &record_failures); + + session_log(se, YLOG_LOG, "%s: hits=" ODR_INT_PRINTF + " fetched=%d filtered=%d", + client_get_id(cl), + client_get_hits(cl), + num, filtered); + if (ingest_failures || record_failures) + { + session_log(se, YLOG_WARN, "%s:" + " ingest failures=%d record failures=%d", + client_get_id(cl), + ingest_failures, record_failures); + } } } else @@ -1314,7 +1363,7 @@ struct record_cluster **show_range_start(struct session *se, if (se->relevance) { for (spp = sp; spp; spp = spp->next) - if (spp->type == Metadata_sortkey_relevance) + if (spp->type == Metadata_type_relevance) { relevance_prepare_read(se->relevance, se->reclist); break; @@ -1429,7 +1478,8 @@ void statistics(struct session *se, struct statistics *stat) } static struct record_metadata *record_metadata_init( - NMEM nmem, const char *value, enum conf_metadata_type type, + NMEM nmem, const char *value, const char *norm, + enum conf_metadata_type type, struct _xmlAttr *attr) { struct record_metadata *rec_md = record_metadata_create(nmem); @@ -1455,17 +1505,29 @@ static struct record_metadata *record_metadata_init( } *attrp = 0; - if (type == Metadata_type_generic) + switch (type) { - char *p = nmem_strdup(nmem, value); - - p = normalize7bit_generic(p, " ,/.:(["); - - rec_md->data.text.disp = p; + case Metadata_type_generic: + case Metadata_type_skiparticle: + if (norm) + { + rec_md->data.text.disp = nmem_strdup(nmem, value); + rec_md->data.text.norm = nmem_strdup(nmem, norm); + } + else + { + if (strstr(value, "://")) /* looks like a URL */ + rec_md->data.text.disp = nmem_strdup(nmem, value); + else + rec_md->data.text.disp = + normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:(["); + rec_md->data.text.norm = rec_md->data.text.disp; + } rec_md->data.text.sort = 0; rec_md->data.text.snippet = 0; - } - else if (type == Metadata_type_year || type == Metadata_type_date) + break; + case Metadata_type_year: + case Metadata_type_date: { int first, last; int longdate = 0; @@ -1478,8 +1540,14 @@ static struct record_metadata *record_metadata_init( rec_md->data.number.min = first; rec_md->data.number.max = last; } - else + break; + case Metadata_type_float: + rec_md->data.fnumber = atof(value); + break; + case Metadata_type_relevance: + case Metadata_type_position: return 0; + } return rec_md; } @@ -1514,7 +1582,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, continue; if (!strcmp((const char *) n->name, "metadata")) { - xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); + const char *type = yaz_xml_get_prop(n, "type"); if (type == NULL) { yaz_log(YLOG_FATAL, "Missing type attribute on metadata element. Skipping!"); } @@ -1533,7 +1601,6 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, if (value) xmlFree(value); } - xmlFree(type); } } return no_found; @@ -1546,7 +1613,7 @@ static const char *get_mergekey(xmlDoc *doc, xmlNode *root, { char *mergekey_norm = 0; WRBUF norm_wr = wrbuf_alloc(); - xmlChar *mergekey; + const char *mergekey; if (session_mergekey) { @@ -1558,10 +1625,9 @@ static const char *get_mergekey(xmlDoc *doc, xmlNode *root, for (i = 0; i < num; i++) get_mergekey_from_doc(doc, root, values[i], service, norm_wr); } - else if ((mergekey = xmlGetProp(root, (xmlChar *) "mergekey"))) + else if ((mergekey = yaz_xml_get_prop(root, "mergekey"))) { - mergekey_norm_wr(service->charsets, norm_wr, (const char *) mergekey); - xmlFree(mergekey); + mergekey_norm_wr(service->charsets, norm_wr, mergekey); } else { @@ -1628,7 +1694,7 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb) continue; if (!strcmp((const char *) n->name, "metadata")) { - xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); + const char *type = yaz_xml_get_prop(n, "type"); if (type) { size_t len; @@ -1656,7 +1722,6 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb) } xmlFree(value); } - xmlFree(type); } } } @@ -1664,6 +1729,8 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb) } static int ingest_to_cluster(struct client *cl, + WRBUF wrbuf_disp, + WRBUF wrbuf_norm, xmlDoc *xdoc, xmlNode *root, int record_no, @@ -1676,9 +1743,7 @@ static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root, { int ret = 0; struct session *se = client_get_session(cl); - struct conf_service *service = se->service; - - insert_settings_values(sdb, xdoc, root, service); + WRBUF wrbuf_disp, wrbuf_norm; if (!check_record_filter(root, sdb)) { @@ -1687,11 +1752,15 @@ static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root, record_no, sdb->database->id); return 0; } + wrbuf_disp = wrbuf_alloc(); + wrbuf_norm = wrbuf_alloc(); session_enter(se, "ingest_sub_record"); if (client_get_session(cl) == se && se->relevance) - ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys); + ret = ingest_to_cluster(cl, wrbuf_disp, wrbuf_norm, + xdoc, root, record_no, mergekeys); session_leave(se, "ingest_sub_record"); - + wrbuf_destroy(wrbuf_norm); + wrbuf_destroy(wrbuf_disp); return ret; } @@ -1711,9 +1780,19 @@ int ingest_record(struct client *cl, const char *rec, struct session_database *sdb = client_get_database(cl); struct conf_service *service = se->service; xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem); - int r = 0; - xmlNode *root; + int r = ingest_xml_record(cl, xdoc, record_no, nmem, 0); + client_store_xdoc(cl, record_no, xdoc); + return r; +} +int ingest_xml_record(struct client *cl, xmlDoc *xdoc, + int record_no, NMEM nmem, int cached_copy) +{ + struct session *se = client_get_session(cl); + struct session_database *sdb = client_get_database(cl); + struct conf_service *service = se->service; + xmlNode *root; + int r = 0; if (!xdoc) return -1; @@ -1728,24 +1807,52 @@ int ingest_record(struct client *cl, const char *rec, if (!strcmp((const char *) root->name, "cluster")) { + int no_merge_keys = 0; + int no_merge_dups = 0; xmlNode *sroot; + struct record_metadata_attr *mk = 0; + for (sroot = root->children; sroot; sroot = sroot->next) - if (sroot->type == XML_ELEMENT_NODE) + if (sroot->type == XML_ELEMENT_NODE && + !strcmp((const char *) sroot->name, "record")) { + struct record_metadata_attr **mkp; const char *mergekey_norm = get_mergekey(xdoc, sroot, cl, record_no, service, nmem, - se->mergekey); - - struct record_metadata_attr *mk = (struct record_metadata_attr*) - nmem_malloc(nmem, sizeof(*mk)); - mk->name = 0; - mk->value = nmem_strdup(nmem, mergekey_norm); - mk->next = 0; - + se->mergekey); + if (!mergekey_norm) + { + r = -1; + break; + } + for (mkp = &mk; *mkp; mkp = &(*mkp)->next) + if (!strcmp((*mkp)->value, mergekey_norm)) + break; + if (!*mkp) + { + *mkp = (struct record_metadata_attr*) + nmem_malloc(nmem, sizeof(**mkp)); + (*mkp)->name = 0; + (*mkp)->value = nmem_strdup(nmem, mergekey_norm); + (*mkp)->next = 0; + no_merge_keys++; + } + else + no_merge_dups++; + } + if (no_merge_keys > 1 || no_merge_dups > 0) + { + yaz_log(YLOG_LOG, "Got %d mergekeys, %d dups for position %d", + no_merge_keys, no_merge_dups, record_no); + } + for (sroot = root->children; !r && sroot; sroot = sroot->next) + if (sroot->type == XML_ELEMENT_NODE && + !strcmp((const char *) sroot->name, "record")) + { + if (!cached_copy) + insert_settings_values(sdb, xdoc, root, service); r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb, mk); - if (r) - break; } } else if (!strcmp((const char *) root->name, "record")) @@ -1761,6 +1868,8 @@ int ingest_record(struct client *cl, const char *rec, mk->value = nmem_strdup(nmem, mergekey_norm); mk->next = 0; + if (!cached_copy) + insert_settings_values(sdb, xdoc, root, service); r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb, mk); } } @@ -1770,7 +1879,6 @@ int ingest_record(struct client *cl, const char *rec, (const char *) root->name); r = -1; } - xmlFreeDoc(xdoc); return r; } @@ -1933,14 +2041,14 @@ static int check_limit_local(struct client *cl, } static int ingest_to_cluster(struct client *cl, + WRBUF wrbuf_disp, + WRBUF wrbuf_norm, xmlDoc *xdoc, xmlNode *root, int record_no, struct record_metadata_attr *merge_keys) { xmlNode *n; - xmlChar *type = 0; - xmlChar *value = 0; struct session *se = client_get_session(cl); struct conf_service *service = se->service; int term_factor = 1; @@ -1957,12 +2065,6 @@ static int ingest_to_cluster(struct client *cl, for (n = root->children; n; n = n->next) { - if (type) - xmlFree(type); - if (value) - xmlFree(value); - type = value = 0; - if (n->type != XML_ELEMENT_NODE) continue; if (!strcmp((const char *) n->name, "metadata")) @@ -1971,20 +2073,12 @@ static int ingest_to_cluster(struct client *cl, struct record_metadata **wheretoput = 0; struct record_metadata *rec_md = 0; int md_field_id = -1; + xmlChar *value0; + const char *type = yaz_xml_get_prop(n, "type"); - type = xmlGetProp(n, (xmlChar *) "type"); - value = xmlNodeListGetString(xdoc, n->children, 1); if (!type) continue; - if (!value || !*value) - { - xmlChar *empty = xmlGetProp(n, (xmlChar *) "empty"); - if (!empty) - continue; - if (value) - xmlFree(value); - value = empty; - } + md_field_id = conf_service_metadata_field_id(service, (const char *) type); if (md_field_id < 0) @@ -1998,15 +2092,30 @@ static int ingest_to_cluster(struct client *cl, continue; } + wrbuf_rewind(wrbuf_disp); + value0 = xmlNodeListGetString(xdoc, n->children, 1); + if (!value0 || !*value0) + { + const char *empty = yaz_xml_get_prop(n, "empty"); + if (!empty) + continue; + wrbuf_puts(wrbuf_disp, (const char *) empty); + } + else + { + wrbuf_puts(wrbuf_disp, (const char *) value0); + } + if (value0) + xmlFree(value0); ser_md = &service->metadata[md_field_id]; // non-merged metadata - rec_md = record_metadata_init(se->nmem, (const char *) value, + rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), 0, ser_md->type, n->properties); if (!rec_md) { session_log(se, YLOG_WARN, "bad metadata data '%s' " - "for element '%s'", value, type); + "for element '%s'", wrbuf_cstr(wrbuf_disp), type); continue; } @@ -2014,7 +2123,7 @@ static int ingest_to_cluster(struct client *cl, { WRBUF w = wrbuf_alloc(); if (relevance_snippet(se->relevance, - (char*) value, ser_md->name, w)) + wrbuf_cstr(wrbuf_disp), ser_md->name, w)) rec_md->data.text.snippet = nmem_strdup(se->nmem, wrbuf_cstr(w)); wrbuf_destroy(w); @@ -2030,18 +2139,14 @@ static int ingest_to_cluster(struct client *cl, if (check_limit_local(cl, record, record_no)) { - session_log(se, YLOG_LOG, "Facet filtered out record no %d from %s", - record_no, sdb->database->id); - if (type) - xmlFree(type); - if (value) - xmlFree(value); return -2; } - cluster = reclist_insert(se->reclist, service, record, + cluster = reclist_insert(se->reclist, se->relevance, service, record, merge_keys, &se->total_merged); if (!cluster) + { return 0; // complete match with existing record + } { const char *use_term_factor_str = @@ -2061,9 +2166,6 @@ static int ingest_to_cluster(struct client *cl, session_log(se, YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid, sdb->database->id, record_no); - - relevance_newrec(se->relevance, cluster); - // original metadata, to check if first existence of a field metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata); memcpy(metadata0, cluster->metadata, @@ -2080,13 +2182,6 @@ static int ingest_to_cluster(struct client *cl, // now parsing XML record and adding data to cluster or record metadata for (n = root->children; n; n = n->next) { - pp2_charset_token_t prt; - if (type) - xmlFree(type); - if (value) - xmlFree(value); - type = value = 0; - if (n->type != XML_ELEMENT_NODE) continue; if (!strcmp((const char *) n->name, "metadata")) @@ -2098,12 +2193,12 @@ static int ingest_to_cluster(struct client *cl, int md_field_id = -1; int sk_field_id = -1; const char *rank = 0; - xmlChar *xml_rank = 0; - - type = xmlGetProp(n, (xmlChar *) "type"); - value = xmlNodeListGetString(xdoc, n->children, 1); + const char *xml_rank = 0; + const char *type = 0; + xmlChar *value0; - if (!type || !value || !*value) + type = yaz_xml_get_prop(n, "type"); + if (!type) continue; md_field_id @@ -2119,12 +2214,39 @@ static int ingest_to_cluster(struct client *cl, ser_sk = &service->sortkeys[sk_field_id]; } - // merged metadata - rec_md = record_metadata_init(se->nmem, (const char *) value, - ser_md->type, 0); + wrbuf_rewind(wrbuf_disp); + wrbuf_rewind(wrbuf_norm); - // see if the field was not in cluster already (from beginning) + value0 = xmlNodeListGetString(xdoc, n->children, 1); + if (!value0 || !*value0) + { + if (value0) + xmlFree(value0); + continue; + } + if (ser_md->icurule) + { + run_icu(se, ser_md->icurule, (const char *) value0, + wrbuf_norm, wrbuf_disp); + yaz_log(YLOG_LOG, "run_icu input=%s norm=%s disp=%s", + (const char *) value0, + wrbuf_cstr(wrbuf_norm), wrbuf_cstr(wrbuf_disp)); + rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), + wrbuf_cstr(wrbuf_norm), + ser_md->type, 0); + } + else + { + wrbuf_puts(wrbuf_disp, (const char *) value0); + rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), + 0, + ser_md->type, 0); + } + + xmlFree(value0); + + // see if the field was not in cluster already (from beginning) if (!rec_md) continue; @@ -2147,7 +2269,7 @@ static int ingest_to_cluster(struct client *cl, } else { - xml_rank = xmlGetProp(n, (xmlChar *) "rank"); + xml_rank = yaz_xml_get_prop(n, "rank"); rank = xml_rank ? (const char *) xml_rank : ser_md->rank; } @@ -2166,8 +2288,8 @@ static int ingest_to_cluster(struct client *cl, { while (*wheretoput) { - if (!strcmp((const char *) (*wheretoput)->data.text.disp, - rec_md->data.text.disp)) + if (!strcmp((const char *) (*wheretoput)->data.text.norm, + rec_md->data.text.norm)) break; wheretoput = &(*wheretoput)->next; } @@ -2177,15 +2299,16 @@ static int ingest_to_cluster(struct client *cl, else if (ser_md->merge == Metadata_merge_longest) { if (!*wheretoput - || strlen(rec_md->data.text.disp) - > strlen((*wheretoput)->data.text.disp)) + || strlen(rec_md->data.text.norm) + > strlen((*wheretoput)->data.text.norm)) { *wheretoput = rec_md; if (ser_sk) { + pp2_charset_token_t prt; const char *sort_str = 0; int skip_article = - ser_sk->type == Metadata_sortkey_skiparticle; + ser_sk->type == Metadata_type_skiparticle; if (!cluster->sortkeys[sk_field_id]) cluster->sortkeys[sk_field_id] = @@ -2246,7 +2369,8 @@ static int ingest_to_cluster(struct client *cl, if (rank) { relevance_countwords(se->relevance, cluster, - (char *) value, rank, ser_md->name); + wrbuf_cstr(wrbuf_disp), + rank, ser_md->name); } // construct facets ... unless the client already has reported them if (ser_md->termlist && !client_has_facet(cl, (char *) type)) @@ -2264,15 +2388,8 @@ static int ingest_to_cluster(struct client *cl, } } else - add_facet(se, (char *) type, (char *) value, term_factor); + add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor); } - - // cleaning up - if (xml_rank) - xmlFree(xml_rank); - xmlFree(type); - xmlFree(value); - type = value = 0; } else { @@ -2282,11 +2399,6 @@ static int ingest_to_cluster(struct client *cl, se->number_of_warnings_unknown_elements++; } } - if (type) - xmlFree(type); - if (value) - xmlFree(value); - nmem_destroy(ingest_nmem); xfree(metadata0); relevance_donerecord(se->relevance, cluster);