X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzsets.c;h=f77258b66475da4bd51227507a7f6dcbc003e769;hb=77686142af94172d1887190ebd47aeb53f704057;hp=5fce7d30566958201db415178f706beec28ae4bd;hpb=69da23537c6bb71ab948e079708bf8ea090de73f;p=idzebra-moved-to-github.git diff --git a/index/zsets.c b/index/zsets.c index 5fce7d3..f77258b 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -4,7 +4,16 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.27 2000-04-05 09:49:36 adam + * Revision 1.30 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.29 2001/01/22 10:42:56 adam + * Added numerical sort. + * + * Revision 1.28 2000/07/07 12:49:20 adam + * Optimized resultSetInsert{Rank,Sort}. + * + * Revision 1.27 2000/04/05 09:49:36 adam * On Unix, zebra/z'mbol uses automake. * * Revision 1.26 2000/03/20 19:08:36 adam @@ -113,6 +122,13 @@ #define SORT_IDX_ENTRYSIZE 64 #define ZSET_SORT_MAX_LEVEL 3 +struct zebra_set_term_entry { + int reg_type; + char *db; + int set; + int use; + char *term; +}; struct zebra_set { char *name; RSET rset; @@ -122,7 +138,10 @@ struct zebra_set { char **basenames; Z_RPNQuery *rpn; struct zset_sort_info *sort_info; + struct zebra_set_term_entry *term_entries; + int term_entries_max; struct zebra_set *next; + int locked; }; struct zset_sort_entry { @@ -151,6 +170,7 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zebraSet = resultSetAdd (zh, setname, 1); if (!zebraSet) return 0; + zebraSet->locked = 1; zebraSet->rpn = 0; zebraSet->num_bases = num_bases; zebraSet->basenames = basenames; @@ -163,9 +183,37 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zh->hits = zebraSet->hits; if (zebraSet->rset) zebraSet->rpn = rpn; + zebraSet->locked = 0; return zebraSet; } +void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, + const char *db, int set, + int use, const char *term) +{ + if (!s->nmem) + s->nmem = nmem_create (); + if (!s->term_entries) + { + int i; + s->term_entries_max = 1000; + s->term_entries = + nmem_malloc (s->nmem, s->term_entries_max * + sizeof(*s->term_entries)); + for (i = 0; i < s->term_entries_max; i++) + s->term_entries[i].term = 0; + } + if (s->hits < s->term_entries_max) + { + s->term_entries[s->hits].reg_type = reg_type; + s->term_entries[s->hits].db = nmem_strdup (s->nmem, db); + s->term_entries[s->hits].set = set; + s->term_entries[s->hits].use = use; + s->term_entries[s->hits].term = nmem_strdup (s->nmem, term); + } + (s->hits)++; +} + ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) { ZebraSet s; @@ -177,7 +225,7 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) if (s) { logf (LOG_DEBUG, "updating result set %s", name); - if (!ov) + if (!ov || s->locked) return NULL; if (s->rset) rset_delete (s->rset); @@ -205,8 +253,12 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) for (i = 0; i < s->sort_info->max_entries; i++) s->sort_info->entries[i] = s->sort_info->all_entries + i; } + s->locked = 0; + s->term_entries = 0; + s->hits = 0; s->rset = 0; - s->nmem = 0; + s->nmem = 0; + s->rpn = 0; return s; } @@ -217,14 +269,13 @@ ZebraSet resultSetGet (ZebraHandle zh, const char *name) for (s = zh->sets; s; s = s->next) if (!strcmp (s->name, name)) { - if (!s->rset && s->rpn) + if (!s->term_entries && !s->rset && s->rpn) { NMEM nmem = nmem_create (); s->rset = rpn_search (zh, nmem, s->rpn, s->num_bases, s->basenames, s->name, s); nmem_destroy (nmem); - } return s; } @@ -266,7 +317,8 @@ void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) if (s->nmem) nmem_destroy (s->nmem); - rset_delete (s->rset); + if (s->rset) + rset_delete (s->rset); xfree (s->name); xfree (s); } @@ -279,7 +331,7 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, int num, int *positions) { ZebraSet sset; - ZebraPosSet sr; + ZebraPosSet sr = 0; RSET rset; int i; struct zset_sort_info *sort_info; @@ -287,75 +339,100 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, if (!(sset = resultSetGet (zh, name))) return NULL; if (!(rset = sset->rset)) - return NULL; - sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); - for (i = 0; isort_info; - if (sort_info) - { - int position; - + if (!sset->term_entries) + return 0; + sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); for (i = 0; i 0 && position <= sort_info->num_entries) + int j; + struct zebra_set_term_entry *entry = sset->term_entries; + + sr[i].sysno = 0; + sr[i].score = -1; + sr[i].term = 0; + sr[i].db = 0; + + if (positions[i] <= sset->term_entries_max) { - logf (LOG_DEBUG, "got pos=%d (sorted)", position); - sr[i].sysno = sort_info->entries[position-1]->sysno; - sr[i].score = sort_info->entries[position-1]->score; + sr[i].term = sset->term_entries[positions[i]-1].term; + sr[i].db = sset->term_entries[positions[i]-1].db; } } } - /* did we really get all entries using sort ? */ - for (i = 0; isort_info; if (sort_info) - position = sort_info->num_entries; - while (num_i < num && positions[num_i] < position) - num_i++; - rfd = rset_open (rset, RSETF_READ); - while (num_i < num && rset_read (rset, rfd, &key, &term_index)) { - if (key.sysno != psysno) + int position; + + for (i = 0; i 0 && position <= sort_info->num_entries) { - /* determine we alreay have this in our set */ - for (i = sort_info->num_entries; --i >= 0; ) - if (psysno == sort_info->entries[i]->sysno) - break; - if (i >= 0) - continue; + logf (LOG_DEBUG, "got pos=%d (sorted)", position); + sr[i].sysno = sort_info->entries[position-1]->sysno; + sr[i].score = sort_info->entries[position-1]->score; } - position++; - assert (num_i < num); - if (position == positions[num_i]) + } + } + /* did we really get all entries using sort ? */ + for (i = 0; inum_entries; + while (num_i < num && positions[num_i] < position) + num_i++; + rfd = rset_open (rset, RSETF_READ); + while (num_i < num && rset_read (rset, rfd, &key, &term_index)) + { + if (key.sysno != psysno) { - sr[num_i].sysno = psysno; - logf (LOG_DEBUG, "got pos=%d (unsorted)", position); - sr[num_i].score = -1; - num_i++; + psysno = key.sysno; + if (sort_info) + { + /* determine we alreay have this in our set */ + for (i = sort_info->num_entries; --i >= 0; ) + if (psysno == sort_info->entries[i]->sysno) + break; + if (i >= 0) + continue; + } + position++; + assert (num_i < num); + if (position == positions[num_i]) + { + sr[num_i].sysno = psysno; + logf (LOG_DEBUG, "got pos=%d (unsorted)", position); + sr[num_i].score = -1; + num_i++; + } } } + rset_close (rset, rfd); } - rset_close (rset, rfd); } return sr; } @@ -368,6 +445,7 @@ void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num) struct sortKeyInfo { int relation; int attrUse; + int numerical; }; void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, @@ -391,8 +469,21 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, int rel = 0; for (j = 0; jentries[i]->buf[j], + if (criteria[j].numerical) + { + double diff = atof(this_entry.buf[j]) - + atof(sort_info->entries[i]->buf[j]); + rel = 0; + if (diff > 0.0) + rel = 1; + else if (diff < 0.0) + rel = -1; + } + else + { + rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j], SORT_IDX_ENTRYSIZE); + } if (rel) break; } @@ -409,20 +500,23 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, break; } } - j = sort_info->max_entries-1; + ++i; + j = sort_info->max_entries; if (i == j) return; - ++i; + + if (sort_info->num_entries == j) + --j; + else + j = (sort_info->num_entries)++; new_entry = sort_info->entries[j]; while (j != i) { sort_info->entries[j] = sort_info->entries[j-1]; --j; } - sort_info->entries[j] = new_entry; + sort_info->entries[i] = new_entry; assert (new_entry); - if (sort_info->num_entries != sort_info->max_entries) - (sort_info->num_entries)++; for (i = 0; ibuf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE); new_entry->sysno = sysno; @@ -453,20 +547,24 @@ void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info, break; } } - j = sort_info->max_entries-1; + ++i; + j = sort_info->max_entries; if (i == j) return; - ++i; + + if (sort_info->num_entries == j) + --j; + else + j = (sort_info->num_entries)++; + new_entry = sort_info->entries[j]; while (j != i) { sort_info->entries[j] = sort_info->entries[j-1]; --j; } - sort_info->entries[j] = new_entry; + sort_info->entries[i] = new_entry; assert (new_entry); - if (sort_info->num_entries != sort_info->max_entries) - (sort_info->num_entries)++; new_entry->sysno = sysno; new_entry->score = score; } @@ -570,7 +668,8 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1); sort_criteria[i].attrUse = zebra_maps_sort (zh->service->zebra_maps, - sk->u.sortAttributes); + sk->u.sortAttributes, + &sort_criteria[i].numerical); logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse); if (sort_criteria[i].attrUse == -1) {