2 * $Id: relevance.c,v 1.1 2006-11-24 20:29:07 quinn Exp $
12 struct relevance_record *records;
14 int *doc_frequency_vec;
20 struct relevance_record
22 struct record *record;
23 int *term_frequency_vec;
26 // We use this data structure to recognize terms in input records,
27 // and map them to record term vectors for counting.
32 struct word_trie *child;
37 static struct word_trie *create_word_trie_node(NMEM nmem)
39 struct word_trie *res = nmem_malloc(nmem, sizeof(struct word_trie));
41 for (i = 0; i < 26; i++)
43 res->list[i].child = 0;
44 res->list[i].termno = -1;
49 static void word_trie_addterm(NMEM nmem, struct word_trie *n, const char *term, int num)
52 int c = tolower(*term);
53 if (c < 'a' || c > 'z')
58 if (!n->list[c].child)
60 struct word_trie *new = create_word_trie_node(nmem);
61 n->list[c].child = new;
64 n->list[c].termno = num;
66 word_trie_addterm(nmem, n->list[c].child, term, num);
73 static struct word_trie *build_word_trie(NMEM nmem, const char **terms)
75 struct word_trie *res = create_word_trie_node(nmem);
79 for (i = 1, p = terms; *p; p++, i++)
80 word_trie_addterm(nmem, res, *p, i);
84 struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs)
86 struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance));
90 for (p = terms, i = 0; *p; p++, i++)
93 res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
94 bzero(res->doc_frequency_vec, res->vec_len * sizeof(int));
97 res->records = nmem_malloc(nmem, numrecs * sizeof(struct relevance_record *));
98 res->wt = build_word_trie(nmem, terms);
102 struct relevance_record *relevance_newrec(struct relevance *r, struct record *rec)
104 struct relevance_record *res = nmem_malloc(r->nmem,
105 sizeof(struct relevance_record));
107 res->term_frequency_vec = nmem_malloc(r->nmem, r->vec_len * sizeof(int));
108 bzero(res->term_frequency_vec, r->vec_len * sizeof(int));
112 void relevance_countwords(struct relevance_record *rec, const char *words, int len)
116 void relevance_donerecord(struct relevance_record *rec)
120 // Prepare for a relevance-sorted read of up to num entries
121 void relevance_prepare_read(struct relevance *r, int num)
125 struct record *relevance_read(struct relevance *r)
133 * indent-tabs-mode: nil
135 * vim: shiftwidth=4 tabstop=8 expandtab