/* This file is part of Pazpar2.
- Copyright (C) 2006-2012 Index Data
+ Copyright (C) 2006-2013 Index Data
Pazpar2 is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
return 0;
}
+int relevance_snippet(struct relevance *r,
+ const char *words, const char *name,
+ WRBUF w_snippet)
+{
+ int no = 0;
+ const char *norm_str;
+ int highlight = 0;
+
+ pp2_charset_token_first(r->prt, words, 0);
+ while ((norm_str = pp2_charset_token_next(r->prt)))
+ {
+ size_t org_start, org_len;
+ struct word_entry *entries = r->entries;
+ int i;
+
+ pp2_get_org(r->prt, &org_start, &org_len);
+ for (; entries; entries = entries->next, i++)
+ {
+ if (*norm_str && !strcmp(norm_str, entries->norm_str))
+ break;
+ }
+ if (entries)
+ {
+ if (!highlight)
+ {
+ highlight = 1;
+ wrbuf_puts(w_snippet, "<match>");
+ no++;
+ }
+ }
+ else
+ {
+ if (highlight)
+ {
+ highlight = 0;
+ wrbuf_puts(w_snippet, "</match>");
+ }
+ }
+ wrbuf_xmlputs_n(w_snippet, words + org_start, org_len);
+ }
+ if (highlight)
+ wrbuf_puts(w_snippet, "</match>");
+ if (no)
+ {
+ yaz_log(YLOG_DEBUG, "SNIPPET match: %s", wrbuf_cstr(w_snippet));
+ }
+ return no;
+}
+
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
const char *words, const char *rank,
const char *name)
break;
}
}
+void relevance_clear(struct relevance *r)
+{
+ if (r)
+ {
+ int i;
+ for (i = 0; i < r->vec_len; i++)
+ r->doc_frequency_vec[i] = 0;
+ }
+}
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
{
NMEM nmem = nmem_create();
struct relevance *res = nmem_malloc(nmem, sizeof(*res));
- int i;
res->nmem = nmem;
res->entries = 0;
pull_terms(res, query);
res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
- for (i = 0; i < res->vec_len; i++)
- res->doc_frequency_vec[i] = 0;
// worker array
res->term_frequency_vec_tmp =
res->term_pos =
nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
+ relevance_clear(res);
return res;
}
}
}
-void relevance_newrec(struct relevance *r, struct record_cluster *rec)
+void relevance_mergerec(struct relevance *r, struct record_cluster *dst,
+ const struct record_cluster *src)
{
- if (!rec->term_frequency_vec)
- {
- int i;
+ int i;
- // term frequency [1,..] . [0] is total length of all fields
- rec->term_frequency_vec =
- nmem_malloc(r->nmem,
- r->vec_len * sizeof(*rec->term_frequency_vec));
- for (i = 0; i < r->vec_len; i++)
- rec->term_frequency_vec[i] = 0;
+ for (i = 0; i < r->vec_len; i++)
+ dst->term_frequency_vec[i] += src->term_frequency_vec[i];
- // term frequency divided by length of field [1,...]
- rec->term_frequency_vecf =
- nmem_malloc(r->nmem,
- r->vec_len * sizeof(*rec->term_frequency_vecf));
- for (i = 0; i < r->vec_len; i++)
- rec->term_frequency_vecf[i] = 0.0;
- }
+ for (i = 0; i < r->vec_len; i++)
+ dst->term_frequency_vecf[i] += src->term_frequency_vecf[i];
+}
+
+void relevance_newrec(struct relevance *r, struct record_cluster *rec)
+{
+ int i;
+
+ // term frequency [1,..] . [0] is total length of all fields
+ rec->term_frequency_vec =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vec));
+ for (i = 0; i < r->vec_len; i++)
+ rec->term_frequency_vec[i] = 0;
+
+ // term frequency divided by length of field [1,...]
+ rec->term_frequency_vecf =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vecf));
+ for (i = 0; i < r->vec_len; i++)
+ rec->term_frequency_vecf[i] = 0.0;
}
void relevance_donerecord(struct relevance *r, struct record_cluster *cluster)