-/* $Id: zsets.c,v 1.60 2004-09-01 15:01:32 heikki Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
- Index Data Aps
+/* $Id: zsets.c,v 1.77 2005-01-15 19:38:30 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
This file is part of the Zebra server.
int term_entries_max;
struct zebra_set *next;
int locked;
+
+ zint cache_position; /* last position */
+ RSFD cache_rfd; /* rfd (NULL if not existing) */
+ zint cache_psysno; /* sysno for last position */
};
struct zset_sort_entry {
struct zset_sort_entry **entries;
};
+static int log_level_set=0;
+static int log_level_sorting=0;
+static int log_level_searchhits=0;
+static int log_level_searchterms=0;
+static int log_level_resultsets=0;
+
+static void loglevels()
+{
+ if (log_level_set)
+ return;
+ log_level_sorting = yaz_log_module_level("sorting");
+ log_level_searchhits = yaz_log_module_level("searchhits");
+ log_level_searchterms = yaz_log_module_level("searchterms");
+ log_level_resultsets = yaz_log_module_level("resultsets");
+ log_level_set=1;
+}
+
ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m,
Z_RPNQuery *rpn, int num_bases,
char **basenames,
zebraSet->locked = 1;
zebraSet->rpn = 0;
zebraSet->nmem = m;
- zebraSet->rset_nmem=nmem_create(); /* FIXME - where to free this ?? */
+ zebraSet->rset_nmem=nmem_create();
zebraSet->num_bases = num_bases;
zebraSet->basenames =
(s->hits)++;
}
-#if 0 /* FIXME - Delete this, we don't count terms no more */
-int zebra_resultSetTerms (ZebraHandle zh, const char *setname,
- int no, zint *count,
- int *type, char *out, size_t *len)
-{
- ZebraSet s = resultSetGet (zh, setname);
- int no_max = 0;
-
- if (count)
- *count = 0;
- if (!s || !s->rset)
- return 0;
- no_max = s->rset->no_rset_terms;
- if (no < 0 || no >= no_max)
- return 0;
- if (count)
- *count = s->rset->rset_terms[no]->count;
- if (type)
- *type = s->rset->rset_terms[no]->type;
-
- if (out)
- {
- char *inbuf = s->rset->rset_terms[no]->name;
- size_t inleft = strlen(inbuf);
- size_t outleft = *len - 1;
- int converted = 0;
-
- if (zh->iconv_from_utf8 != 0)
- {
- char *outbuf = out;
- size_t ret;
-
- ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
- &outbuf, &outleft);
- if (ret == (size_t)(-1))
- *len = 0;
- else
- *len = outbuf - out;
- converted = 1;
- }
- if (!converted)
- {
- if (inleft > outleft)
- inleft = outleft;
- *len = inleft;
- memcpy (out, inbuf, *len);
- }
- out[*len] = 0;
- }
- return no_max;
-}
-
-#endif
-
ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
{
ZebraSet s;
for (s = zh->sets; s; s = s->next)
if (!strcmp (s->name, name))
break;
+
+ if (!log_level_set)
+ loglevels();
if (s)
{
- yaz_log (LOG_DEBUG, "updating result set %s", name);
+ yaz_log(log_level_resultsets, "updating result set %s", name);
if (!ov || s->locked)
return NULL;
if (s->rset)
+ {
+ if (s->cache_rfd)
+ rset_close(s->cache_rfd);
rset_delete (s->rset);
+ }
+ if (s->rset_nmem)
+ nmem_destroy (s->rset_nmem);
if (s->nmem)
nmem_destroy (s->nmem);
}
{
const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
- yaz_log (LOG_DEBUG, "adding result set %s", name);
+ yaz_log(log_level_resultsets, "adding result set %s", name);
s = (ZebraSet) xmalloc (sizeof(*s));
s->next = zh->sets;
zh->sets = s;
s->rset_nmem=0;
s->nmem = 0;
s->rpn = 0;
+ s->cache_position = 0;
+ s->cache_rfd = 0;
return s;
}
if (!s->term_entries && !s->rset && s->rpn)
{
NMEM nmem = nmem_create ();
- yaz_log (LOG_LOG, "research %s", name);
+ yaz_log(log_level_resultsets, "research %s", name);
+ if (!s->rset_nmem)
+ s->rset_nmem=nmem_create();
s->rset =
rpn_search (zh, nmem, s->rset_nmem, s->rpn, s->num_bases,
s->basenames, s->name, s);
{
ZebraSet s = zh->sets;
+ yaz_log(log_level_resultsets, "invalidating result sets");
for (; s; s = s->next)
{
if (s->rset)
+ {
+ if (s->cache_rfd)
+ rset_close(s->cache_rfd);
rset_delete (s->rset);
+ }
s->rset = 0;
+ s->cache_rfd = 0;
+ s->cache_position = 0;
if (s->rset_nmem)
nmem_destroy(s->rset_nmem);
s->rset_nmem=0;
if (s->nmem)
nmem_destroy (s->nmem);
if (s->rset)
+ {
+ if (s->cache_rfd)
+ rset_close(s->cache_rfd);
rset_delete (s->rset);
+ }
if (s->rset_nmem)
nmem_destroy(s->rset_nmem);
xfree (s->name);
}
}
-ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name,
- int num, int *positions)
+ZebraMetaRecord *zebra_meta_records_create_range (ZebraHandle zh,
+ const char *name,
+ zint start, int num)
+{
+ zint pos_small[10];
+ zint *pos = pos_small;
+ ZebraMetaRecord *mr;
+ int i;
+
+ if (num > 10000 || num <= 0)
+ return 0;
+
+ if (num > 10)
+ pos = xmalloc(sizeof(*pos) * num);
+
+ for (i = 0; i<num; i++)
+ pos[i] = start+i;
+
+ mr = zebra_meta_records_create(zh, name, num, pos);
+
+ if (num > 10)
+ xfree(pos);
+ return mr;
+}
+
+ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name,
+ int num, zint *positions)
{
ZebraSet sset;
- ZebraPosSet sr = 0;
+ ZebraMetaRecord *sr = 0;
RSET rset;
int i;
struct zset_sort_info *sort_info;
+ if (!log_level_set)
+ loglevels();
if (!(sset = resultSetGet (zh, name)))
return NULL;
if (!(rset = sset->rset))
{
if (!sset->term_entries)
return 0;
- sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
+ sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
for (i = 0; i<num; i++)
{
sr[i].sysno = 0;
}
else
{
- sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
+ sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
for (i = 0; i<num; i++)
{
sr[i].sysno = 0;
sort_info = sset->sort_info;
if (sort_info)
{
- int position;
+ zint position;
for (i = 0; i<num; i++)
{
position = positions[i];
if (position > 0 && position <= sort_info->num_entries)
{
- yaz_log (LOG_DEBUG, "got pos=%d (sorted)", position);
+ yaz_log(log_level_sorting, "got pos=" ZINT_FORMAT
+ " (sorted)", position);
sr[i].sysno = sort_info->entries[position-1]->sysno;
sr[i].score = sort_info->entries[position-1]->score;
}
}
if (i < num) /* nope, get the rest, unsorted - sorry */
{
- int position = 0;
+ zint position = 0;
int num_i = 0;
zint psysno = 0;
RSFD rfd;
position = sort_info->num_entries;
while (num_i < num && positions[num_i] < position)
num_i++;
- rfd = rset_open (rset, RSETF_READ);
- while (num_i < num && rset_read (rfd, &key))
+
+ if (sset->cache_rfd &&
+ num_i < num && positions[num_i] > sset->cache_position)
+ {
+ position = sset->cache_position;
+ rfd = sset->cache_rfd;
+ psysno = sset->cache_psysno;
+ }
+ else
+ {
+ if (sset->cache_rfd)
+ rset_close(sset->cache_rfd);
+ rfd = rset_open (rset, RSETF_READ);
+ }
+ while (num_i < num && rset_read (rfd, &key, 0))
{
-#if IT_KEY_NEW
zint this_sys = key.mem[0];
-#else
- zint this_sys = key.sysno;
-#endif
if (this_sys != psysno)
{
psysno = this_sys;
if (position == positions[num_i])
{
sr[num_i].sysno = psysno;
- yaz_log (LOG_DEBUG, "got pos=%d (unsorted)", position);
+ yaz_log(log_level_sorting, "got pos=" ZINT_FORMAT " (unsorted)", position);
sr[num_i].score = -1;
num_i++;
}
}
}
- rset_close (rfd);
+ sset->cache_position = position;
+ sset->cache_psysno = psysno;
+ sset->cache_rfd = rfd;
}
}
return sr;
}
-void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num)
+void zebra_meta_records_destroy (ZebraHandle zh, ZebraMetaRecord *records,
+ int num)
{
assert(zh); /* compiler shut up about unused arg */
xfree (records);
zh->errCode = 230;
return;
}
- yaz_log (LOG_DEBUG, "result set sort input=%s output=%s",
+ if (!log_level_set)
+ loglevels();
+ yaz_log(log_level_sorting, "result set sort input=%s output=%s",
*input_setnames, output_setname);
sset = resultSetGet (zh, input_setnames[0]);
if (!sset)
Z_SortKeySpecList *sort_sequence, int *sort_status)
{
int i;
+ int n = 0;
+ zint kno = 0;
zint psysno = 0;
struct it_key key;
struct sortKeyInfo sort_criteria[3];
int num_criteria;
RSFD rfd;
+ TERMID termid;
+ TERMID *terms;
+ int numTerms = 0;
- yaz_log (LOG_LOG, "resultSetSortSingle start");
assert(nmem); /* compiler shut up about unused param */
sset->sort_info->num_entries = 0;
+ rset_getterms(rset, 0, 0, &n);
+ terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
+ rset_getterms(rset, terms, n, &numTerms);
+
sset->hits = 0;
num_criteria = sort_sequence->num_specs;
if (num_criteria > 3)
switch (sk->which)
{
case Z_SortKey_sortField:
- yaz_log (LOG_DEBUG, "Sort: key %d is of type sortField", i+1);
+ yaz_log(log_level_sorting, "Sort: key %d is of type sortField", i+1);
zh->errCode = 207;
return;
case Z_SortKey_elementSpec:
- yaz_log (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1);
+ yaz_log(log_level_sorting, "Sort: key %d is of type elementSpec", i+1);
zh->errCode = 207;
return;
case Z_SortKey_sortAttributes:
- yaz_log (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1);
+ yaz_log(log_level_sorting, "Sort: key %d is of type sortAttributes", i+1);
sort_criteria[i].attrUse =
zebra_maps_sort (zh->reg->zebra_maps,
sk->u.sortAttributes,
&sort_criteria[i].numerical);
- yaz_log (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse);
+ yaz_log(log_level_sorting, "use value = %d", sort_criteria[i].attrUse);
if (sort_criteria[i].attrUse == -1)
{
zh->errCode = 116;
}
}
rfd = rset_open (rset, RSETF_READ);
- while (rset_read (rfd, &key))
+ while (rset_read (rfd, &key, &termid))
+ /* FIXME - pass a TERMID *, and use it for something below !! */
{
-#if IT_KEY_NEW
zint this_sys = key.mem[0];
-#else
- zint this_sys = key.sysno;
-#endif
+ kno++;
if (this_sys != psysno)
{
(sset->hits)++;
}
}
rset_close (rfd);
-
-#if 0
- for (i = 0; i < rset->no_rset_terms; i++)
- yaz_log (LOG_LOG, "term=\"%s\" nn=" ZINT_FORMAT
- " type=%s count=" ZINT_FORMAT,
- rset->rset_terms[i]->name,
- rset->rset_terms[i]->nn,
- rset->rset_terms[i]->flags,
- rset->rset_terms[i]->count);
-#endif
+ yaz_log(log_level_sorting, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
+ kno, sset->hits);
+ for (i = 0; i < numTerms; i++)
+ yaz_log(log_level_sorting, "term=\"%s\" type=%s count=" ZINT_FORMAT,
+ terms[i]->name, terms[i]->flags, rset_count(terms[i]->rset));
*sort_status = Z_SortResponse_success;
- yaz_log (LOG_LOG, "resultSetSortSingle end");
}
RSET resultSetRef (ZebraHandle zh, const char *resultSetId)
return NULL;
}
-void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset)
+void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset, NMEM nmem)
{
zint kno = 0;
struct it_key key;
RSFD rfd;
- /* int term_index; */
+ TERMID termid;
+ TERMID *terms;
+ int numTerms = 0;
+ int n = 0;
int i;
ZebraRankClass rank_class;
struct rank_control *rc;
zint esthits;
double ratio;
+ if (!log_level_set)
+ loglevels();
sort_info = zebraSet->sort_info;
sort_info->num_entries = 0;
zebraSet->hits = 0;
- rfd = rset_open (rset, RSETF_READ);
+ rset_getterms(rset, 0, 0, &n);
+ terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
+ rset_getterms(rset, terms, n, &numTerms);
- yaz_log (LOG_LOG, "resultSetRank");
+ rfd = rset_open (rset, RSETF_READ);
rank_class = zebraRankLookup (zh, rank_handler_name);
if (!rank_class)
{
- yaz_log (LOG_WARN, "No such rank handler: %s", rank_handler_name);
+ yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
return;
}
rc = rank_class->control;
- if (rset_read (rfd, &key))
+ if (rset_read (rfd, &key, &termid))
{
-#if IT_KEY_NEW
zint psysno = key.mem[0];
-#else
- zint psysno = key.sysno;
-#endif
int score;
void *handle =
- (*rc->begin) (zh->reg, rank_class->class_handle, rset);
+ (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
+ terms, numTerms);
(zebraSet->hits)++;
- esthits=atoi(res_get_def(zh->res,"estimatehits","0"));
+ esthits = atoi(res_get_def(zh->res, "estimatehits","0"));
if (!esthits)
- est=-1; /* can not do */
+ est = -1; /* can not do */
do
{
-#if IT_KEY_NEW
- zint this_sys = key.mem[0];
-#else
- zint this_sys = key.sysno;
-#endif
+ zint this_sys = key.mem[0]; /* FIXME - assumes scope==2 */
+ zint seqno = key.mem[key.len-1]; /* FIXME - assumes scope==2 */
kno++;
+ key_logdump_txt(log_level_searchhits,&key," Got hit");
if (this_sys != psysno)
{
score = (*rc->calc) (handle, psysno);
(zebraSet->hits)++;
psysno = this_sys;
}
- /* FIXME - Ranking is broken, since rsets no longer have */
- /* term lists! */
- /* (*rc->add) (handle, this_sys, term_index); */
+ (*rc->add) (handle, seqno, termid);
- if ( (est==-2) && (zebraSet->hits==esthits))
- { /* time to estimate the hits */
- rset_pos(rfd,&cur,&tot);
- if (tot>0) {
- ratio=cur/tot;
- est=(zint)(0.5+zebraSet->hits/ratio);
- logf(LOG_LOG, "Estimating hits (%s) "
- "%0.1f->"ZINT_FORMAT
- "; %0.1f->"ZINT_FORMAT,
- rset->control->desc,
- cur, zebraSet->hits,
- tot,est);
- i=0; /* round to 3 significant digits */
- while (est>1000) {
- est/=10;
- i++;
+ if ((est==-2) && (zebraSet->hits==esthits))
+ { /* time to estimate the hits */
+ rset_pos(rfd,&cur,&tot);
+ if (tot>0) {
+ ratio = cur/tot;
+ est = (zint)(0.5+zebraSet->hits/ratio);
+ yaz_log(log_level_searchhits, "Estimating hits (%s) "
+ "%0.1f->" ZINT_FORMAT
+ "; %0.1f->" ZINT_FORMAT,
+ rset->control->desc,
+ cur, zebraSet->hits,
+ tot, est);
+ i = 0; /* round to 3 significant digits */
+ while (est>1000) {
+ est /= 10;
+ i++;
+ }
+ while (i--)
+ est *= 10;
+ zebraSet->hits = est;
}
- while (i--) est*=10;
- zebraSet->hits=est;
}
}
- }
- while (rset_read (rfd, &key) && (est<0) );
-
- score = (*rc->calc) (handle, psysno);
- resultSetInsertRank (zh, sort_info, psysno, score, 'A');
+ while (rset_read (rfd, &key,&termid) && (est<0) );
+ score = (*rc->calc)(handle, psysno);
+ resultSetInsertRank(zh, sort_info, psysno, score, 'A');
(*rc->end) (zh->reg, handle);
}
rset_close (rfd);
-/*
- for (i = 0; i < rset->no_rset_terms; i++)
+
+ yaz_log(log_level_searchterms, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, rank",
+ kno, zebraSet->hits);
+ for (i = 0; i < numTerms; i++)
{
- if (est>0)
- rset->rset_terms[i]->count =
- est=(zint)(rset->rset_terms[i]->count/ratio);
- yaz_log (LOG_LOG, "term=\"%s\" nn=" ZINT_FORMAT
- " type=%s count=" ZINT_FORMAT,
- rset->rset_terms[i]->name,
- rset->rset_terms[i]->nn,
- rset->rset_terms[i]->flags,
- rset->rset_terms[i]->count);
+ yaz_log(log_level_searchterms, "term=\"%s\" type=%s count=" ZINT_FORMAT,
+ terms[i]->name, terms[i]->flags, rset_count(terms[i]->rset));
}
-*/
- yaz_log (LOG_LOG, ZINT_FORMAT " keys, "ZINT_FORMAT" distinct sysnos",
- kno, zebraSet->hits);
}
ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
ZebraRankClass p_next = p->next;
if (p->init_flag && p->control->destroy)
(*p->control->destroy)(reg, p->class_handle);
- xfree (p->control->name);
- xfree (p->control);
- xfree (p);
+ xfree(p->control->name);
+ xfree(p->control);
+ xfree(p);
p = p_next;
}
reg->rank_classes = NULL;