+
+// FIXME. The definition of a word is crude here.. should support
+// some form of localization mechanism?
+void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
+ const char *words, int multiplier)
+{
+ while (*words)
+ {
+ char c;
+ int res;
+ int skipped = 0;
+ while (*words && (c = raw_char(tolower(*words))) < 0)
+ words++;
+ if (!*words)
+ break;
+ res = word_trie_match(r->wt, words, &skipped);
+ if (res)
+ {
+ words += skipped;
+ cluster->term_frequency_vec[res] += multiplier;
+ }
+ else
+ {
+ while (*words && (c = raw_char(tolower(*words))) >= 0)
+ words++;
+ }
+ cluster->term_frequency_vec[0]++;
+ }
+}
+
+#else
+
+struct word_entry {
+ const char *norm_str;
+ int termno;
+ struct word_entry *next;
+};
+
+static void add_word_entry(NMEM nmem,
+ struct word_entry **entries,
+ const char *norm_str,
+ int term_no)
+{
+ struct word_entry *ne = nmem_malloc(nmem, sizeof(*ne));
+ ne->norm_str = nmem_strdup(nmem, norm_str);
+ ne->termno = term_no;
+
+ ne->next = *entries;
+ *entries = ne;
+}
+
+
+int word_entry_match(struct word_entry *entries, const char *norm_str)
+{
+ for (; entries; entries = entries->next)
+ {
+ if (!strcmp(norm_str, entries->norm_str))
+ return entries->termno;
+ }
+ return 0;
+}
+
+static struct word_entry *build_word_entries(pp2_charset_t pct, NMEM nmem,
+ const char **terms)
+{
+ int termno = 1; /* >0 signals THERE is an entry */
+ struct word_entry *entries = 0;
+ const char **p = terms;
+
+ for (; *p; p++)
+ {
+ pp2_relevance_token_t prt = pp2_relevance_tokenize(pct, *p);
+ const char *norm_str;
+
+ while ((norm_str = pp2_relevance_token_next(prt)))
+ add_word_entry(nmem, &entries, norm_str, termno);
+
+ pp2_relevance_token_destroy(prt);
+
+ termno++;
+ }
+ return entries;
+}
+
+void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
+ const char *words, int multiplier)
+{
+ pp2_relevance_token_t prt = pp2_relevance_tokenize(r->pct, words);
+
+ const char *norm_str;
+
+ while ((norm_str = pp2_relevance_token_next(prt)))
+ {
+ int res = word_entry_match(r->entries, norm_str);
+ if (res)
+ cluster->term_frequency_vec[res] += multiplier;
+ cluster->term_frequency_vec[0]++;
+ }
+ pp2_relevance_token_destroy(prt);
+}
+
+#endif
+
+
+
+struct relevance *relevance_create(pp2_charset_t pct,
+ NMEM nmem, const char **terms, int numrecs)