X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Frelevance.c;h=08527aec06810451acf631fd1210ed81c791833d;hb=8cc486015ba87bc3daf483d18d31d163c3670a21;hp=9766a5ba5d77ecb3781c80387001d9f8a3d770b5;hpb=7184974aff1c737312a05376845b9a83c03c28ab;p=pazpar2-moved-to-github.git

diff --git a/src/relevance.c b/src/relevance.c
index 9766a5b..08527ae 100644
--- a/src/relevance.c
+++ b/src/relevance.c
@@ -1,5 +1,5 @@
 /* This file is part of Pazpar2.
-   Copyright (C) 2006-2012 Index Data
+   Copyright (C) 2006-2013 Index Data
 
 Pazpar2 is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "relevance.h"
 #include "session.h"
 
+#ifdef WIN32
+#define log2(x) (log(x)/log(2))
+#endif
+
 struct relevance
 {
     int *doc_frequency_vec;
@@ -79,6 +83,55 @@ static struct word_entry *word_entry_match(struct relevance *r,
     return 0;
 }
 
+int relevance_snippet(struct relevance *r,
+                      const char *words, const char *name,
+                      WRBUF w_snippet)
+{
+    int no = 0;
+    const char *norm_str;
+    int highlight = 0;
+
+    pp2_charset_token_first(r->prt, words, 0);
+    while ((norm_str = pp2_charset_token_next(r->prt)))
+    {
+        size_t org_start, org_len;
+        struct word_entry *entries = r->entries;
+        int i;
+
+        pp2_get_org(r->prt, &org_start, &org_len);
+        for (; entries; entries = entries->next, i++)
+        {
+            if (*norm_str && !strcmp(norm_str, entries->norm_str))
+                break;
+        }
+        if (entries)
+        {
+            if (!highlight)
+            {
+                highlight = 1;
+                wrbuf_puts(w_snippet, "<match>");
+                no++;
+            }
+        }
+        else
+        {
+            if (highlight)
+            {
+                highlight = 0;
+                wrbuf_puts(w_snippet, "</match>");
+            }
+        }
+        wrbuf_xmlputs_n(w_snippet, words + org_start, org_len);
+    }
+    if (highlight)
+        wrbuf_puts(w_snippet, "</match>");
+    if (no)
+    {
+        yaz_log(YLOG_DEBUG, "SNIPPET match: %s", wrbuf_cstr(w_snippet));
+    }
+    return no;
+}
+
 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
                           const char *words, const char *rank,
                           const char *name)
@@ -211,6 +264,15 @@ static void pull_terms(struct relevance *res, struct ccl_rpn_node *n)
         break;
     }
 }
+void relevance_clear(struct relevance *r)
+{
+    if (r)
+    {
+        int i;
+        for (i = 0; i < r->vec_len; i++)
+            r->doc_frequency_vec[i] = 0;
+    }
+}
 
 struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
                                        struct ccl_rpn_node *query,
@@ -220,7 +282,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
 {
     NMEM nmem = nmem_create();
     struct relevance *res = nmem_malloc(nmem, sizeof(*res));
-    int i;
 
     res->nmem = nmem;
     res->entries = 0;
@@ -234,8 +295,6 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
     pull_terms(res, query);
 
     res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
-    for (i = 0; i < res->vec_len; i++)
-        res->doc_frequency_vec[i] = 0;
 
     // worker array
     res->term_frequency_vec_tmp =
@@ -245,6 +304,7 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
     res->term_pos =
         nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
 
+    relevance_clear(res);
     return res;
 }