X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fsession.c;h=41447c64145b24c5d9901d0b99a9c0455b4c5cfc;hb=0d45377ca625dc66907c18bb3f524f9d6c354236;hp=9e238a68793c90ba6014f9739b509555fabe6160;hpb=c1de9c944c1383e173558b15bc693bd0eae2fba5;p=pazpar2-moved-to-github.git diff --git a/src/session.c b/src/session.c index 9e238a6..41447c6 100644 --- a/src/session.c +++ b/src/session.c @@ -203,13 +203,75 @@ static void session_normalize_facet(struct session *s, run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf); } -void add_facet(struct session *s, const char *type, const char *value, int count) +struct facet_id { + char *client_id; + char *type; + char *id; + char *term; + struct facet_id *next; +}; + +static void session_add_id_facet(struct session *s, struct client *cl, + const char *type, + const char *id, + size_t id_len, + const char *term) +{ + struct facet_id *t = nmem_malloc(s->session_nmem, sizeof(*t)); + + t->client_id = nmem_strdup(s->session_nmem, client_get_id(cl)); + t->type = nmem_strdup(s->session_nmem, type); + t->id = nmem_strdupn(s->session_nmem, id, id_len); + t->term = nmem_strdup(s->session_nmem, term); + t->next = s->facet_id_list; + s->facet_id_list = t; +} + + +// Look up a facet term, and return matching id +// If facet type not found, returns 0 +// If facet type found, but no matching term, returns "" +const char *session_lookup_id_facet(struct session *s, struct client *cl, + const char *type, + const char *term) +{ + char *retval = 0; + struct facet_id *t = s->facet_id_list; + for (; t; t = t->next) + { + if (!strcmp(client_get_id(cl), t->client_id) && !strcmp(t->type, type) ) + { + retval = ""; + if ( !strcmp(t->term, term)) + { + return t->id; + } + } + } + return retval; +} + +void add_facet(struct session *s, const char *type, const char *value, int count, struct client *cl) { WRBUF facet_wrbuf = wrbuf_alloc(); WRBUF display_wrbuf = wrbuf_alloc(); + const char *id = 0; + size_t id_len = 0; - session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf); + /* inspect pz:facetmap:split:name ?? */ + if (!strncmp(type, "split:", 6)) + { + const char *cp = strchr(value, ':'); + if (cp) + { + id = value; + id_len = cp - value; + value = cp + 1; + } + type += 6; + } + session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf); if (wrbuf_len(facet_wrbuf)) { struct named_termlist **tp = &s->termlists; @@ -224,7 +286,10 @@ void add_facet(struct session *s, const char *type, const char *value, int count (*tp)->next = 0; } termlist_insert((*tp)->termlist, wrbuf_cstr(display_wrbuf), - wrbuf_cstr(facet_wrbuf), count); + wrbuf_cstr(facet_wrbuf), id, id_len, count); + if (id) + session_add_id_facet(s, cl, type, id, id_len, + wrbuf_cstr(display_wrbuf)); } wrbuf_destroy(facet_wrbuf); wrbuf_destroy(display_wrbuf); @@ -505,7 +570,6 @@ static void select_targets_callback(struct session *se, l->next = se->clients_cached; se->clients_cached = l; } - /* set session always. If may be 0 if client is not active */ client_set_session(cl, se); l = xmalloc(sizeof(*l)); @@ -554,6 +618,7 @@ static void session_remove_cached_clients(struct session *se) client_lock(l->client); client_set_session(l->client, 0); client_set_database(l->client, 0); + client_mark_dead(l->client); client_unlock(l->client); client_destroy(l->client); xfree(l); @@ -658,7 +723,7 @@ void session_sort(struct session *se, struct reclist_sortparms *sp, break; if (sr) { - session_log(se, YLOG_DEBUG, "session_sort: field=%s increasing=%d type=%d already fetched", + session_log(se, YLOG_LOG, "session_sort: field=%s increasing=%d type=%d already fetched", field, increasing, type); session_leave(se, "session_sort"); return; @@ -676,7 +741,7 @@ void session_sort(struct session *se, struct reclist_sortparms *sp, struct client *cl = l->client; // Assume no re-search is required. client_parse_init(cl, 1); - clients_research += client_parse_sort(cl, sp); + clients_research += client_parse_sort(cl, sp, 0); } if (!clients_research || se->clients_starting) { @@ -718,6 +783,7 @@ void session_sort(struct session *se, struct reclist_sortparms *sp, } session_enter(se, "session_sort"); se->clients_starting = 0; + se->force_position = 0; session_leave(se, "session_sort"); } } @@ -765,6 +831,7 @@ enum pazpar2_error_code session_search(struct session *se, int no_working = 0; int no_failed_query = 0; int no_failed_limit = 0; + int no_sortmap = 0; struct client_list *l; session_log(se, YLOG_DEBUG, "Search"); @@ -778,6 +845,7 @@ enum pazpar2_error_code session_search(struct session *se, return PAZPAR2_NO_ERROR; } se->clients_starting = 1; + se->force_position = 0; session_leave(se, "session_search0"); if (se->settings_modified) { @@ -818,6 +886,7 @@ enum pazpar2_error_code session_search(struct session *se, *addinfo = "limit"; session_leave(se, "session_search"); se->clients_starting = 0; + session_reset_active_clients(se, 0); return PAZPAR2_MALFORMED_PARAMETER_VALUE; } @@ -846,12 +915,19 @@ enum pazpar2_error_code session_search(struct session *se, else { client_parse_range(cl, startrecs, maxrecs); - client_parse_sort(cl, sp); + client_parse_sort(cl, sp, &no_sortmap); client_start_search(cl); no_working++; } } + yaz_log(YLOG_LOG, "session_search: no_working=%d no_sortmap=%d", + no_working, no_sortmap); session_enter(se, "session_search2"); + if (no_working == 1 && no_sortmap == 1) + { + se->force_position = 1; + yaz_log(YLOG_LOG, "force_position=1"); + } se->clients_starting = 0; session_leave(se, "session_search2"); if (no_working == 0) @@ -1027,6 +1103,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service, session->clients_cached = 0; session->settings_modified = 0; session->session_nmem = nmem; + session->facet_id_list = 0; session->nmem = nmem_create(); session->databases = 0; session->sorted_results = 0; @@ -1034,6 +1111,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service, session->mergekey = 0; session->rank = 0; session->clients_starting = 0; + session->force_position = 0; for (i = 0; i <= SESSION_WATCH_MAX; i++) { @@ -1049,8 +1127,6 @@ struct session *new_session(NMEM nmem, struct conf_service *service, return session; } -const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf); - static struct hitsbytarget *hitsbytarget_nb(struct session *se, int *count, NMEM nmem) { @@ -1084,8 +1160,11 @@ static struct hitsbytarget *hitsbytarget_nb(struct session *se, session_settings_dump(se, client_get_database(cl), w); res[*count].settings_xml = nmem_strdup(nmem, wrbuf_cstr(w)); wrbuf_rewind(w); - wrbuf_puts(w, ""); - res[*count].suggestions_xml = nmem_strdup(nmem, client_get_suggestions_xml(cl, w)); + res[*count].suggestions_xml = + nmem_strdup(nmem, client_get_suggestions_xml(cl, w)); + + res[*count].query_data = + client_get_query(cl, &res[*count].query_type, nmem); wrbuf_destroy(w); (*count)++; } @@ -1215,7 +1294,6 @@ void perform_termlist(struct http_channel *c, struct session *se, wrbuf_puts(c->wrbuf, ""); wrbuf_xmlputs(c->wrbuf, p[i]->display_term); wrbuf_puts(c->wrbuf, ""); - wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); @@ -1352,6 +1430,7 @@ struct record_cluster **show_range_start(struct session *se, struct reclist_sortparms *spp; struct client_list *l; int i; + NMEM nmem_tmp = 0; #if USE_TIMING yaz_timing_t t = yaz_timing_create(); #endif @@ -1373,7 +1452,15 @@ struct record_cluster **show_range_start(struct session *se, *approx_hits += client_get_approximation(l->client); } } + if (se->force_position) + { + nmem_tmp = nmem_create(); + sp = reclist_parse_sortparms(nmem_tmp, "position:1", 0); + assert(sp); + } reclist_sort(se->reclist, sp); + if (nmem_tmp) + nmem_destroy(nmem_tmp); reclist_enter(se->reclist); *total = reclist_get_num_records(se->reclist); @@ -1478,7 +1565,8 @@ void statistics(struct session *se, struct statistics *stat) } static struct record_metadata *record_metadata_init( - NMEM nmem, const char *value, enum conf_metadata_type type, + NMEM nmem, const char *value, const char *norm, + enum conf_metadata_type type, struct _xmlAttr *attr) { struct record_metadata *rec_md = record_metadata_create(nmem); @@ -1508,11 +1596,20 @@ static struct record_metadata *record_metadata_init( { case Metadata_type_generic: case Metadata_type_skiparticle: - if (strstr(value, "://")) /* looks like a URL */ + if (norm) + { rec_md->data.text.disp = nmem_strdup(nmem, value); + rec_md->data.text.norm = nmem_strdup(nmem, norm); + } else - rec_md->data.text.disp = - normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:(["); + { + if (strstr(value, "://")) /* looks like a URL */ + rec_md->data.text.disp = nmem_strdup(nmem, value); + else + rec_md->data.text.disp = + normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:(["); + rec_md->data.text.norm = rec_md->data.text.disp; + } rec_md->data.text.sort = 0; rec_md->data.text.snippet = 0; break; @@ -1536,6 +1633,7 @@ static struct record_metadata *record_metadata_init( break; case Metadata_type_relevance: case Metadata_type_position: + case Metadata_type_retrieval: return 0; } return rec_md; @@ -1644,7 +1742,7 @@ static const char *get_mergekey(xmlDoc *doc, xmlNode *root, /* generate unique key if none is not generated already or is empty */ if (wrbuf_len(norm_wr) == 0) { - wrbuf_printf(norm_wr, "position: %s-%d", + wrbuf_printf(norm_wr, "position: %s-%06d", client_get_id(cl), record_no); } else @@ -2068,6 +2166,20 @@ static int ingest_to_cluster(struct client *cl, if (!type) continue; + + md_field_id + = conf_service_metadata_field_id(service, (const char *) type); + if (md_field_id < 0) + { + if (se->number_of_warnings_unknown_metadata == 0) + { + session_log(se, YLOG_WARN, + "Ignoring unknown metadata element: %s", type); + } + se->number_of_warnings_unknown_metadata++; + continue; + } + wrbuf_rewind(wrbuf_disp); value0 = xmlNodeListGetString(xdoc, n->children, 1); if (!value0 || !*value0) @@ -2083,23 +2195,10 @@ static int ingest_to_cluster(struct client *cl, } if (value0) xmlFree(value0); - md_field_id - = conf_service_metadata_field_id(service, (const char *) type); - if (md_field_id < 0) - { - if (se->number_of_warnings_unknown_metadata == 0) - { - session_log(se, YLOG_WARN, - "Ignoring unknown metadata element: %s", type); - } - se->number_of_warnings_unknown_metadata++; - continue; - } - ser_md = &service->metadata[md_field_id]; // non-merged metadata - rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), + rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), 0, ser_md->type, n->properties); if (!rec_md) { @@ -2186,7 +2285,6 @@ static int ingest_to_cluster(struct client *cl, const char *type = 0; xmlChar *value0; - wrbuf_rewind(wrbuf_disp); type = yaz_xml_get_prop(n, "type"); if (!type) continue; @@ -2204,6 +2302,9 @@ static int ingest_to_cluster(struct client *cl, ser_sk = &service->sortkeys[sk_field_id]; } + wrbuf_rewind(wrbuf_disp); + wrbuf_rewind(wrbuf_norm); + value0 = xmlNodeListGetString(xdoc, n->children, 1); if (!value0 || !*value0) { @@ -2211,16 +2312,29 @@ static int ingest_to_cluster(struct client *cl, xmlFree(value0); continue; } - wrbuf_puts(wrbuf_disp, (const char *) value0); - xmlFree(value0); + if (ser_md->icurule) + { + run_icu(se, ser_md->icurule, (const char *) value0, + wrbuf_norm, wrbuf_disp); + yaz_log(YLOG_LOG, "run_icu input=%s norm=%s disp=%s", + (const char *) value0, + wrbuf_cstr(wrbuf_norm), wrbuf_cstr(wrbuf_disp)); + rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), + wrbuf_cstr(wrbuf_norm), + ser_md->type, 0); + } + else + { + wrbuf_puts(wrbuf_disp, (const char *) value0); + rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), + 0, + ser_md->type, 0); + } - // merged metadata - rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), - ser_md->type, 0); + xmlFree(value0); // see if the field was not in cluster already (from beginning) - if (!rec_md) continue; @@ -2262,8 +2376,8 @@ static int ingest_to_cluster(struct client *cl, { while (*wheretoput) { - if (!strcmp((const char *) (*wheretoput)->data.text.disp, - rec_md->data.text.disp)) + if (!strcmp((const char *) (*wheretoput)->data.text.norm, + rec_md->data.text.norm)) break; wheretoput = &(*wheretoput)->next; } @@ -2273,8 +2387,8 @@ static int ingest_to_cluster(struct client *cl, else if (ser_md->merge == Metadata_merge_longest) { if (!*wheretoput - || strlen(rec_md->data.text.disp) - > strlen((*wheretoput)->data.text.disp)) + || strlen(rec_md->data.text.norm) + > strlen((*wheretoput)->data.text.norm)) { *wheretoput = rec_md; if (ser_sk) @@ -2354,15 +2468,15 @@ static int ingest_to_cluster(struct client *cl, char year[64]; sprintf(year, "%d", rec_md->data.number.max); - add_facet(se, (char *) type, year, term_factor); + add_facet(se, (char *) type, year, term_factor, cl); if (rec_md->data.number.max != rec_md->data.number.min) { sprintf(year, "%d", rec_md->data.number.min); - add_facet(se, (char *) type, year, term_factor); + add_facet(se, (char *) type, year, term_factor, cl); } } else - add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor); + add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor, cl); } } else