X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Frelevance.c;h=08527aec06810451acf631fd1210ed81c791833d;hb=8cc486015ba87bc3daf483d18d31d163c3670a21;hp=9766a5ba5d77ecb3781c80387001d9f8a3d770b5;hpb=7184974aff1c737312a05376845b9a83c03c28ab;p=pazpar2-moved-to-github.git diff --git a/src/relevance.c b/src/relevance.c index 9766a5b..08527ae 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -1,5 +1,5 @@ /* This file is part of Pazpar2. - Copyright (C) 2006-2012 Index Data + Copyright (C) 2006-2013 Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "relevance.h" #include "session.h" +#ifdef WIN32 +#define log2(x) (log(x)/log(2)) +#endif + struct relevance { int *doc_frequency_vec; @@ -79,6 +83,55 @@ static struct word_entry *word_entry_match(struct relevance *r, return 0; } +int relevance_snippet(struct relevance *r, + const char *words, const char *name, + WRBUF w_snippet) +{ + int no = 0; + const char *norm_str; + int highlight = 0; + + pp2_charset_token_first(r->prt, words, 0); + while ((norm_str = pp2_charset_token_next(r->prt))) + { + size_t org_start, org_len; + struct word_entry *entries = r->entries; + int i; + + pp2_get_org(r->prt, &org_start, &org_len); + for (; entries; entries = entries->next, i++) + { + if (*norm_str && !strcmp(norm_str, entries->norm_str)) + break; + } + if (entries) + { + if (!highlight) + { + highlight = 1; + wrbuf_puts(w_snippet, ""); + no++; + } + } + else + { + if (highlight) + { + highlight = 0; + wrbuf_puts(w_snippet, ""); + } + } + wrbuf_xmlputs_n(w_snippet, words + org_start, org_len); + } + if (highlight) + wrbuf_puts(w_snippet, ""); + if (no) + { + yaz_log(YLOG_DEBUG, "SNIPPET match: %s", wrbuf_cstr(w_snippet)); + } + return no; +} + void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, const char *rank, const char *name) @@ -211,6 +264,15 @@ static void pull_terms(struct relevance *res, struct ccl_rpn_node *n) break; } } +void relevance_clear(struct relevance *r) +{ + if (r) + { + int i; + for (i = 0; i < r->vec_len; i++) + r->doc_frequency_vec[i] = 0; + } +} struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, struct ccl_rpn_node *query, @@ -220,7 +282,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, { NMEM nmem = nmem_create(); struct relevance *res = nmem_malloc(nmem, sizeof(*res)); - int i; res->nmem = nmem; res->entries = 0; @@ -234,8 +295,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, pull_terms(res, query); res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); - for (i = 0; i < res->vec_len; i++) - res->doc_frequency_vec[i] = 0; // worker array res->term_frequency_vec_tmp = @@ -245,6 +304,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft, res->term_pos = nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos)); + relevance_clear(res); return res; }