-/* $Id: zsets.c,v 1.87 2005-06-07 11:36:38 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: zsets.c,v 1.110 2006-08-14 10:40:15 adam Exp $
+ Copyright (C) 1995-2006
Index Data ApS
This file is part of the Zebra server.
for more details.
You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
*/
#endif
#include "index.h"
+#include "rank.h"
#include <yaz/diagbib1.h>
#include <rset.h>
-#define SORT_IDX_ENTRYSIZE 64
-#define ZSET_SORT_MAX_LEVEL 3
+#define ZSET_SORT_MAX_LEVEL 10
struct zebra_set_term_entry {
int reg_type;
char *db;
- int set;
- int use;
+ char *index_name;
char *term;
};
int num_bases;
char **basenames;
Z_RPNQuery *rpn;
+ Z_SortKeySpecList *sortSpec;
struct zset_sort_info *sort_info;
struct zebra_set_term_entry *term_entries;
int term_entries_max;
zint cache_position; /* last position */
RSFD cache_rfd; /* rfd (NULL if not existing) */
zint cache_psysno; /* sysno for last position */
+ zint approx_limit; /* limit before we do approx */
};
struct zset_sort_entry {
zint sysno;
int score;
- char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];
};
struct zset_sort_info {
for (i = 0; sort_sequence->specs[i]; i++)
;
sort_sequence->num_specs = i;
+ rset->hits_limit = sset->approx_limit;
if (!i)
{
res = resultSetRank (zh, sset, rset, rset_nmem);
zh->hits = 0;
- zebraSet = resultSetAdd (zh, setname, 1);
+ zebraSet = resultSetAdd(zh, setname, 1);
if (!zebraSet)
return ZEBRA_FAIL;
zebraSet->locked = 1;
zebraSet->basenames =
nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
for (i = 0; i<num_bases; i++)
- zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]);
+ zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
rpn, zebraSet);
return res;
}
-void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
- const char *db, int set,
- int use, const char *term)
+void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
+ const char *db, const char *index_name,
+ const char *term)
{
assert(zh); /* compiler shut up */
if (!s->nmem)
{
s->term_entries[s->hits].reg_type = reg_type;
s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
- s->term_entries[s->hits].set = set;
- s->term_entries[s->hits].use = use;
- s->term_entries[s->hits].term = nmem_strdup (s->nmem, term);
+ s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
+ s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
}
(s->hits)++;
}
-ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
+ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
{
ZebraSet s;
int i;
s->rset_nmem = 0;
s->nmem = 0;
s->rpn = 0;
+ s->sortSpec = 0;
s->cache_position = 0;
s->cache_rfd = 0;
+ s->approx_limit = zh->approx_limit;
return s;
}
-ZebraSet resultSetGet (ZebraHandle zh, const char *name)
+ZebraSet resultSetGet(ZebraHandle zh, const char *name)
{
ZebraSet s;
if (!s->rset_nmem)
s->rset_nmem=nmem_create();
resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
+ if (s->rset && s->sortSpec)
+ {
+ int sort_status;
+ yaz_log(log_level_resultsets, "resort %s", name);
+ resultSetSortSingle (zh, nmem, s, s->rset, s->sortSpec,
+ &sort_status);
+ }
nmem_destroy (nmem);
}
return s;
}
}
-void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses)
+void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
{
ZebraSet * ss = &zh->sets;
int i;
RSET rset;
int i;
struct zset_sort_info *sort_info;
+ size_t sysno_mem_index = 0;
+
+ if (zh->m_staticrank)
+ sysno_mem_index = 1;
if (!log_level_set)
loglevels();
}
while (num_i < num && rset_read (rfd, &key, 0))
{
- zint this_sys = key.mem[0];
+ zint this_sys = key.mem[sysno_mem_index];
if (this_sys != psysno)
{
psysno = this_sys;
struct sortKeyInfo {
int relation;
+#if 0
int attrUse;
+#else
+ int ord;
+#endif
int numerical;
};
-void resultSetInsertSort (ZebraHandle zh, ZebraSet sset,
- struct sortKeyInfo *criteria, int num_criteria,
- zint sysno)
+void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
+ struct sortKeyInfo *criteria, int num_criteria,
+ zint sysno,
+ char *cmp_buf[], char *tmp_cmp_buf[])
{
- struct zset_sort_entry this_entry;
struct zset_sort_entry *new_entry = NULL;
struct zset_sort_info *sort_info = sset->sort_info;
int i, j;
sortIdx_sysno (zh->reg->sortIdx, sysno);
for (i = 0; i<num_criteria; i++)
{
- sortIdx_type (zh->reg->sortIdx, criteria[i].attrUse);
- sortIdx_read (zh->reg->sortIdx, this_entry.buf[i]);
+ char *this_entry_buf = tmp_cmp_buf[i];
+ memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
+ if (criteria[i].ord != -1)
+ {
+ sortIdx_type(zh->reg->sortIdx, criteria[i].ord);
+ sortIdx_read(zh->reg->sortIdx, this_entry_buf);
+ }
}
i = sort_info->num_entries;
while (--i >= 0)
int rel = 0;
for (j = 0; j<num_criteria; j++)
{
+ char *this_entry_buf = tmp_cmp_buf[j];
+ char *other_entry_buf =
+ cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
if (criteria[j].numerical)
{
- double diff = atof(this_entry.buf[j]) -
- atof(sort_info->entries[i]->buf[j]);
+ double diff = atof(this_entry_buf) - atof(other_entry_buf);
rel = 0;
if (diff > 0.0)
rel = 1;
}
else
{
- rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j],
- SORT_IDX_ENTRYSIZE);
+ rel = memcmp(this_entry_buf, other_entry_buf,
+ SORT_IDX_ENTRYSIZE);
}
if (rel)
break;
new_entry = sort_info->entries[j];
while (j != i)
{
+ int k;
+ for (k = 0; k<num_criteria; k++)
+ {
+ char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
+ char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
+ memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
+ }
sort_info->entries[j] = sort_info->entries[j-1];
--j;
}
sort_info->entries[i] = new_entry;
assert (new_entry);
for (i = 0; i<num_criteria; i++)
- memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE);
+ {
+ char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
+ char *this_entry_buf = tmp_cmp_buf[i];
+ memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
+ }
new_entry->sysno = sysno;
new_entry->score = -1;
}
-void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info,
- zint sysno, int score, int relation)
+void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
+ zint sysno, int score, int relation)
{
struct zset_sort_entry *new_entry = NULL;
int i, j;
new_entry->score = score;
}
+static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
+{
+ Z_RPNQuery *dst = 0;
+ ODR encode = odr_createmem(ODR_ENCODE);
+ ODR decode = odr_createmem(ODR_DECODE);
+
+ if (z_RPNQuery(encode, &src, 0, 0))
+ {
+ int len;
+ char *buf = odr_getbuf(encode, &len, 0);
+
+ if (buf)
+ {
+ odr_setbuf(decode, buf, len, 0);
+ z_RPNQuery(decode, &dst, 0, 0);
+ }
+ }
+ nmem_transfer(nmem, decode->mem);
+ odr_destroy(encode);
+ odr_destroy(decode);
+ return dst;
+}
+
+static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
+{
+ Z_SortKeySpecList *dst = 0;
+ ODR encode = odr_createmem(ODR_ENCODE);
+ ODR decode = odr_createmem(ODR_DECODE);
+
+ if (z_SortKeySpecList(encode, &src, 0, 0))
+ {
+ int len;
+ char *buf = odr_getbuf(encode, &len, 0);
+
+ if (buf)
+ {
+ odr_setbuf(decode, buf, len, 0);
+ z_SortKeySpecList(decode, &dst, 0, 0);
+ }
+ }
+ nmem_transfer(nmem, decode->mem);
+ odr_destroy(encode);
+ odr_destroy(decode);
+ return dst;
+}
+
+ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
+ ZebraSet rset)
+{
+ ZebraSet nset;
+ int i;
+
+ nset = resultSetAdd(zh, setname, 1);
+ if (!nset)
+ return 0;
+
+ nset->nmem = nmem_create();
+
+ nset->num_bases = rset->num_bases;
+ nset->basenames =
+ nmem_malloc (nset->nmem, nset->num_bases * sizeof(*rset->basenames));
+ for (i = 0; i<rset->num_bases; i++)
+ nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
+
+ if (rset->rset)
+ nset->rset = rset_dup(rset->rset);
+ if (rset->rpn)
+ nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
+ return nset;
+}
+
ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
int num_input_setnames, const char **input_setnames,
const char *output_setname,
return ZEBRA_FAIL;
}
if (strcmp (output_setname, input_setnames[0]))
- {
- rset = rset_dup (rset);
- sset = resultSetAdd (zh, output_setname, 1);
- sset->rset = rset;
- }
+ sset = resultSetClone(zh, output_setname, sset);
+ sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
return resultSetSortSingle (zh, nmem, sset, rset, sort_sequence,
sort_status);
}
zint kno = 0;
zint psysno = 0;
struct it_key key;
- struct sortKeyInfo sort_criteria[3];
+ struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
+ char *cmp_buf[ZSET_SORT_MAX_LEVEL];
+ char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
int num_criteria;
RSFD rfd;
TERMID termid;
TERMID *terms;
int numTerms = 0;
+ size_t sysno_mem_index = 0;
+
+ if (zh->m_staticrank)
+ sysno_mem_index = 1;
assert(nmem); /* compiler shut up about unused param */
sset->sort_info->num_entries = 0;
sset->hits = 0;
num_criteria = sort_sequence->num_specs;
- if (num_criteria > 3)
- num_criteria = 3;
+ if (num_criteria > ZSET_SORT_MAX_LEVEL)
+ num_criteria = ZSET_SORT_MAX_LEVEL;
for (i = 0; i < num_criteria; i++)
{
Z_SortKeySpec *sks = sort_sequence->specs[i];
Z_SortKey *sk;
+ ZEBRA_RES res;
+
+ sort_criteria[i].ord = -1;
+ sort_criteria[i].numerical = 0;
+ if (sks->which == Z_SortKeySpec_missingValueData)
+ {
+ zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
+ return ZEBRA_FAIL;
+ }
if (*sks->sortRelation == Z_SortKeySpec_ascending)
sort_criteria[i].relation = 'A';
else if (*sks->sortRelation == Z_SortKeySpec_descending)
case Z_SortKey_sortField:
yaz_log(log_level_sort, "key %d is of type sortField",
i+1);
- zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
- return ZEBRA_FAIL;
+ sort_criteria[i].numerical = 0;
+ sort_criteria[i].ord =
+ zebraExplain_lookup_attr_str(zh->reg->zei,
+ zinfo_index_category_sort,
+ 's',
+ sk->u.sortField);
+ if (sks->which != Z_SortKeySpec_null
+ && sort_criteria[i].ord == -1)
+ {
+ zebra_setError(zh,
+ YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
+ return ZEBRA_FAIL;
+ }
+ break;
case Z_SortKey_elementSpec:
yaz_log(log_level_sort, "key %d is of type elementSpec",
i+1);
return ZEBRA_FAIL;
case Z_SortKey_sortAttributes:
yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
- sort_criteria[i].attrUse =
- zebra_maps_sort (zh->reg->zebra_maps,
- sk->u.sortAttributes,
- &sort_criteria[i].numerical);
- yaz_log(log_level_sort, "use value = %d", sort_criteria[i].attrUse);
- if (sort_criteria[i].attrUse == -1)
- {
- zebra_setError(
- zh, YAZ_BIB1_USE_ATTRIBUTE_REQUIRED_BUT_NOT_SUPPLIED, 0);
- return ZEBRA_FAIL;
- }
- if (sortIdx_type (zh->reg->sortIdx, sort_criteria[i].attrUse))
- {
- zebra_setError(
- zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
+ res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
+ &sort_criteria[i].ord,
+ &sort_criteria[i].numerical);
+ if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
return ZEBRA_FAIL;
- }
break;
}
}
+ /* allocate space for each cmpare buf + one extra for tmp comparison */
+ for (i = 0; i<num_criteria; i++)
+ {
+ cmp_buf[i] = xmalloc(sset->sort_info->max_entries
+ * SORT_IDX_ENTRYSIZE);
+ tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
+ }
rfd = rset_open (rset, RSETF_READ);
while (rset_read (rfd, &key, &termid))
{
- zint this_sys = key.mem[0];
+ zint this_sys = key.mem[sysno_mem_index];
if (log_level_searchhits)
key_logdump_txt(log_level_searchhits, &key, termid->name);
kno++;
{
(sset->hits)++;
psysno = this_sys;
- resultSetInsertSort (zh, sset,
- sort_criteria, num_criteria, psysno);
+ resultSetInsertSort(zh, sset,
+ sort_criteria, num_criteria, psysno, cmp_buf,
+ tmp_cmp_buf);
}
}
rset_close (rfd);
+
+ for (i = 0; i<num_criteria; i++)
+ {
+ xfree(cmp_buf[i]);
+ xfree(tmp_cmp_buf[i]);
+ }
+
yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
kno, sset->hits);
for (i = 0; i < numTerms; i++)
ZebraRankClass rank_class;
struct zset_sort_info *sort_info;
const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
+ size_t sysno_mem_index = 0;
+
+ if (zh->m_staticrank)
+ sysno_mem_index = 1;
if (!log_level_set)
loglevels();
terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
rset_getterms(rset, terms, n, &numTerms);
+
rank_class = zebraRankLookup(zh, rank_handler_name);
if (!rank_class)
{
{
RSFD rfd = rset_open(rset, RSETF_READ);
struct rank_control *rc = rank_class->control;
- double score;
+ int score;
+ zint count = 0;
void *handle =
(*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
terms, numTerms);
- zint psysno = 0;
+ zint psysno = 0; /* previous doc id / sys no */
+ zint pstaticrank = 0; /* previous static rank */
+ int stop_flag = 0;
while (rset_read(rfd, &key, &termid))
{
- zint this_sys = key.mem[0];
+ zint this_sys = key.mem[sysno_mem_index];
+
zint seqno = key.mem[key.len-1];
kno++;
if (log_level_searchhits)
key_logdump_txt(log_level_searchhits, &key, termid->name);
- if (this_sys != psysno)
- {
- if (rfd->counted_items >= rset->hits_limit)
+ if (this_sys != psysno)
+ { /* new record .. */
+ if (rfd->counted_items > rset->hits_limit)
break;
if (psysno)
- {
- score = (*rc->calc) (handle, psysno);
+ { /* only if we did have a previous record */
+ score = (*rc->calc) (handle, psysno, pstaticrank,
+ &stop_flag);
+ /* insert the hit. A=Ascending */
resultSetInsertRank (zh, sort_info, psysno, score, 'A');
+ count++;
+ if (stop_flag)
+ break;
}
psysno = this_sys;
+ if (zh->m_staticrank)
+ pstaticrank = key.mem[0];
}
(*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
}
+ /* no more items */
if (psysno)
- {
- score = (*rc->calc)(handle, psysno);
+ { /* we had - at least - one record */
+ score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
+ /* insert the hit. A=Ascending */
resultSetInsertRank(zh, sort_info, psysno, score, 'A');
+ count++;
}
(*rc->end) (zh->reg, handle);
rset_close (rfd);
if (approx_array)
approx_array[no] = rset->hits_approx;
#if 0
- yaz_log(YLOG_LOG, "rset=%p term=%s count=" ZINT_FORMAT,
- rset, rset->term->name, rset->hits_count);
+ yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
+ " count=" ZINT_FORMAT,
+ rset, rset->term->name, rset->hits_limit, rset->hits_count);
#endif
no++;
}
ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
int no, zint *count, int *approx,
- char *termbuf, size_t *termlen)
+ char *termbuf, size_t *termlen,
+ const char **term_ref_id)
{
ZebraSet sset = resultSetGet(zh, setname);
if (sset)
}
termbuf[*termlen] = '\0';
}
+ if (term_ref_id)
+ *term_ref_id = term_array[no]->ref_id;
xfree(term_array);
xfree(hits_array);
zint sysno, zebra_snippets *snippets)
{
ZebraSet sset = resultSetGet(zh, setname);
- yaz_log(YLOG_LOG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
+ yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
setname, sysno);
if (!sset)
return ZEBRA_FAIL;
NMEM nmem = nmem_create();
struct it_key key;
RSET rsets[2], rset_comb;
- RSET rset_temp = rstemp_create(nmem, kc, kc->scope,
- res_get (zh->res, "setTmpDir"),0 );
+ RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
+ res_get (zh->res, "setTmpDir"),0 );
TERMID termid;
RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
rsets[0] = rset_temp;
rsets[1] = rset_dup(sset->rset);
- rset_comb = rsmulti_and_create(nmem, kc, kc->scope, 2, rsets);
+ rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
rsfd = rset_open(rset_comb, RSETF_READ);
if (termid)
{
struct ord_list *ol;
- key_logdump_txt(YLOG_LOG, &key, termid->name);
for (ol = termid->ol; ol; ol = ol->next)
{
- yaz_log(YLOG_LOG, " ord=%d", ol->ord);
zebra_snippets_append(snippets, key.mem[key.len-1],
ol->ord, termid->name);
}
rset_delete(rset_comb);
nmem_destroy(nmem);
+ kc->dec(kc);
}
return ZEBRA_OK;
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+