for (; *p; p++)
{
- pp2_relevance_token_t prt = pp2_relevance_tokenize(pct, *p);
+ pp2_relevance_token_t prt = pp2_relevance_tokenize(pct, *p, 0);
const char *norm_str;
while ((norm_str = pp2_relevance_token_next(prt)))
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
const char *words, int multiplier, const char *name)
{
- pp2_relevance_token_t prt = pp2_relevance_tokenize(r->pct, words);
+ pp2_relevance_token_t prt = pp2_relevance_tokenize(r->pct, words, 0);
int *mult = cluster->term_frequency_vec_tmp;
const char *norm_str;
int i, length = 0;
for (i = 1; i < r->vec_len; i++)
{
- cluster->term_frequency_vecf[i] += (double) mult[i] / length;
+ if (length > 0) /* only add if non-empty */
+ cluster->term_frequency_vecf[i] += (double) mult[i] / length;
cluster->term_frequency_vec[i] += mult[i];
}