From: Adam Dickmeiss Date: Wed, 15 Aug 2012 10:59:05 +0000 (+0200) Subject: Full termlist saved (as before) and also sorted. X-Git-Tag: v1.6.16~10 X-Git-Url: http://git.indexdata.com/?p=pazpar2-moved-to-github.git;a=commitdiff_plain;h=b0e04a1cb9261a088d5c100dbc891f3240fa88ce Full termlist saved (as before) and also sorted. No max of termlist(s) any more. The memory usage is the same (in fact less - over time), but we have a full quick sort instead of a smaller quick sort, But NO insertion sort for each term (which is probably similar to extra quick sort). This makes termlists stable. --- diff --git a/src/session.c b/src/session.c index 1c158e6..f85a03e 100644 --- a/src/session.c +++ b/src/session.c @@ -78,8 +78,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include -#define TERMLIST_HIGH_SCORE 100 - #define MAX_CHUNK 15 #define MAX(a,b) ((a)>(b)?(a):(b)) @@ -231,8 +229,7 @@ void add_facet(struct session *s, const char *type, const char *value, int count } s->termlists[i].name = nmem_strdup(s->nmem, type); - s->termlists[i].termlist - = termlist_create(s->nmem, TERMLIST_HIGH_SCORE); + s->termlists[i].termlist = termlist_create(s->nmem); s->num_termlists = i + 1; } @@ -1114,7 +1111,8 @@ void perform_termlist(struct http_channel *c, struct session *se, wrbuf_puts(c->wrbuf, "\">\n"); must_generate_empty = 0; - p = termlist_highscore(se->termlists[i].termlist, &len); + p = termlist_highscore(se->termlists[i].termlist, &len, + nmem_tmp); if (p) { int i; diff --git a/src/termlists.c b/src/termlists.c index 54e5f67..6808a68 100644 --- a/src/termlists.c +++ b/src/termlists.c @@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #endif +#include #include #include #include @@ -44,15 +45,11 @@ struct termlist struct termlist_bucket **hashtable; unsigned hash_size; - struct termlist_score **highscore; - int highscore_size; - int highscore_num; - int highscore_min; - + int no_entries; NMEM nmem; }; -struct termlist *termlist_create(NMEM nmem, int highscore_size) +struct termlist *termlist_create(NMEM nmem) { struct termlist *res = nmem_malloc(nmem, sizeof(struct termlist)); res->hash_size = 399; @@ -60,53 +57,10 @@ struct termlist *termlist_create(NMEM nmem, int highscore_size) nmem_malloc(nmem, res->hash_size * sizeof(struct termlist_bucket*)); memset(res->hashtable, 0, res->hash_size * sizeof(struct termlist_bucket*)); res->nmem = nmem; - - res->highscore = nmem_malloc(nmem, highscore_size * sizeof(struct termlist_score *)); - res->highscore_size = highscore_size; - res->highscore_num = 0; - res->highscore_min = 0; - + res->no_entries = 0; return res; } -static void update_highscore(struct termlist *tl, struct termlist_score *t) -{ - int i; - int smallest; - int me = -1; - - if (tl->highscore_num > tl->highscore_size && t->frequency < tl->highscore_min) - return; - - smallest = 0; - for (i = 0; i < tl->highscore_num; i++) - { - if (tl->highscore[i]->frequency < tl->highscore[smallest]->frequency) - smallest = i; - if (tl->highscore[i] == t) - me = i; - } - if (tl->highscore_num) - tl->highscore_min = tl->highscore[smallest]->frequency; - if (t->frequency < tl->highscore_min) - tl->highscore_min = t->frequency; - if (me >= 0) - return; - if (tl->highscore_num < tl->highscore_size) - { - tl->highscore[tl->highscore_num++] = t; - if (t->frequency < tl->highscore_min) - tl->highscore_min = t->frequency; - } - else - { - if (t->frequency > tl->highscore[smallest]->frequency) - { - tl->highscore[smallest] = t; - } - } -} - void termlist_insert(struct termlist *tl, const char *display_term, const char *norm_term, int freq) { @@ -123,7 +77,6 @@ void termlist_insert(struct termlist *tl, const char *display_term, if (!strcmp(buf, (*p)->term.norm_term)) { (*p)->term.frequency += freq; - update_highscore(tl, &((*p)->term)); break; } } @@ -137,7 +90,7 @@ void termlist_insert(struct termlist *tl, const char *display_term, new->term.frequency = freq; new->next = 0; *p = new; - update_highscore(tl, &new->term); + tl->no_entries++; } } @@ -151,11 +104,25 @@ static int compare(const void *s1, const void *s2) return strcmp((*p1)->display_term, (*p2)->display_term); } -struct termlist_score **termlist_highscore(struct termlist *tl, int *len) +struct termlist_score **termlist_highscore(struct termlist *tl, int *len, + NMEM nmem) { - qsort(tl->highscore, tl->highscore_num, sizeof(struct termlist_score*), compare); - *len = tl->highscore_num; - return tl->highscore; + struct termlist_score **highscore = + (struct termlist_score **) + nmem_malloc(nmem, tl->no_entries * sizeof(*highscore)); + + int no = 0; + unsigned bucket; + for (bucket = 0; bucket < tl->hash_size; bucket++) + { + struct termlist_bucket *p; + for (p = tl->hashtable[bucket]; p; p = p->next) + highscore[no++] = &p->term; + } + assert(no == tl->no_entries); + qsort(highscore, tl->no_entries, sizeof(struct termlist_score*), compare); + *len = tl->no_entries; + return highscore; } /* diff --git a/src/termlists.h b/src/termlists.h index a2f0741..6f7271b 100644 --- a/src/termlists.h +++ b/src/termlists.h @@ -31,10 +31,11 @@ struct termlist_score struct termlist; -struct termlist *termlist_create(NMEM nmem, int highscore_size); +struct termlist *termlist_create(NMEM nmem); void termlist_insert(struct termlist *tl, const char *display_term, const char *norm_term, int freq); -struct termlist_score **termlist_highscore(struct termlist *tl, int *len); +struct termlist_score **termlist_highscore(struct termlist *tl, int *len, + NMEM nmem); #endif