-/* $Id: rsmultiandor.c,v 1.2 2004-09-28 16:12:42 heikki Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
- Index Data Aps
+/* $Id: rsmultiandor.c,v 1.14 2005-03-08 14:02:15 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
This file is part of the Zebra server.
/*
- * This module implements the rsmultior and rsmultiand result sets
+ * This module implements the rsmulti_or and rsmulti_and result sets
*
* rsmultior is based on a heap, from which we find the next hit.
*
#include <string.h>
#include <zebrautl.h>
-#include <isamc.h>
+#include <idzebra/isamc.h>
#include <rset.h>
static RSFD r_open_and (RSET ct, int flag);
static RSFD r_open_or (RSET ct, int flag);
static void r_close (RSFD rfd);
static void r_delete (RSET ct);
-static void r_rewind (RSFD rfd);
-static int r_read_and (RSFD rfd, void *buf);
-static int r_read_or (RSFD rfd, void *buf);
+static int r_read_and (RSFD rfd, void *buf, TERMID *term);
+static int r_read_or (RSFD rfd, void *buf, TERMID *term);
static int r_write (RSFD rfd, const void *buf);
-static int r_forward_and(RSFD rfd, void *buf,
+static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
const void *untilbuf);
-static int r_forward_or(RSFD rfd, void *buf,
+static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
const void *untilbuf);
static void r_pos (RSFD rfd, double *current, double *total);
+static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
static const struct rset_control control_or =
{
"multi-or",
r_delete,
+ r_get_terms,
r_open_or,
r_close,
- r_rewind,
r_forward_or,
r_pos,
r_read_or,
{
"multi-and",
r_delete,
+ r_get_terms,
r_open_and,
r_close,
- r_rewind,
r_forward_and,
r_pos,
r_read_and,
RSFD fd;
void *buf;
RSET rset;
+ TERMID term;
};
struct heap {
char *tailbits;
};
+static int log_level = 0;
+static int log_level_initialized = 0;
+
+
/* Heap functions ***********************/
#if 0
-static void heap_dump_item( HEAP h, int i, int level) {
+static void heap_dump_item( HEAP h, int i, int level)
+{
double cur,tot;
if (i>h->heapnum)
return;
(void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot);
- logf(LOG_LOG," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
+ yaz_log(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
&(h->heap[i]), h->heap[i]->buf, cur,tot );
heap_dump_item(h, 2*i, level+1);
heap_dump_item(h, 2*i+1, level+1);
}
static void heap_dump( HEAP h,char *msg) {
- logf(LOG_LOG, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
+ yaz_log(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
heap_dump_item(h,1,1);
}
#endif
{
struct heap_item *swap;
swap = h->heap[x];
- h->heap[x]=h->heap[y];
- h->heap[y]=swap;
+ h->heap[x] = h->heap[y];
+ h->heap[y] = swap;
}
static int heap_cmp(HEAP h, int x, int y)
static void heap_delete (HEAP h)
{ /* deletes the first item in the heap, and balances the rest */
int cur = 1, child = 2;
- h->heap[1]=0; /* been deleted */
+ h->heap[1] = 0; /* been deleted */
heap_swap (h, 1, h->heapnum--);
while (child <= h->heapnum) {
if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
cur = ++(h->heapnum);
assert(cur <= h->heapmax);
- h->heap[cur]=hi;
+ h->heap[cur] = hi;
parent = cur/2;
while (parent && (heap_cmp(h,parent,cur) > 0))
{
++size; /* heap array starts at 1 */
h->heapnum = 0;
h->heapmax = size;
- h->kctrl=kctrl;
+ h->kctrl = kctrl;
h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap));
h->heap[0]=0; /* not used */
return h;
static void heap_clear( HEAP h)
{
assert(h);
- h->heapnum=0;
+ h->heapnum = 0;
}
static void heap_destroy (HEAP h)
int compare_ands(const void *x, const void *y)
{ /* used in qsort to get the multi-and args in optimal order */
/* that is, those with fewest occurrences first */
- const struct heap_item *hx=x;
- const struct heap_item *hy=y;
+ const struct heap_item *hx = x;
+ const struct heap_item *hy = y;
double cur, totx, toty;
rset_pos(hx->fd, &cur, &totx);
rset_pos(hy->fd, &cur, &toty);
- if ( totx > toty +0.5 ) return 1;
- if ( totx < toty -0.5 ) return -1;
+ if ( totx > toty +0.5 )
+ return 1;
+ if ( totx < toty -0.5 )
+ return -1;
return 0; /* return totx - toty, except for overflows and rounding */
}
int scope, int no_rsets, RSET* rsets,
const struct rset_control *ctrl)
{
- RSET rnew=rset_create_base(ctrl, nmem,kcontrol, scope);
+ RSET rnew = rset_create_base(ctrl, nmem,kcontrol, scope,0);
struct rset_multiandor_info *info;
+ if (!log_level_initialized)
+ {
+ log_level = yaz_log_module_level("rsmultiandor");
+ log_level_initialized = 1;
+ }
info = (struct rset_multiandor_info *) nmem_malloc(rnew->nmem,sizeof(*info));
- info->no_rsets=no_rsets;
- info->rsets=(RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets));
+ info->no_rsets = no_rsets;
+ info->rsets = (RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets));
memcpy(info->rsets,rsets,no_rsets*sizeof(*rsets));
- rnew->priv=info;
+ rnew->priv = info;
return rnew;
}
-RSET rsmultior_create( NMEM nmem, const struct key_control *kcontrol, int scope,
- int no_rsets, RSET* rsets)
+RSET rsmulti_or_create(NMEM nmem, const struct key_control *kcontrol,
+ int scope, int no_rsets, RSET* rsets)
{
return rsmulti_andor_create(nmem, kcontrol, scope,
no_rsets, rsets, &control_or);
}
-RSET rsmultiand_create( NMEM nmem, const struct key_control *kcontrol, int scope,
- int no_rsets, RSET* rsets)
+RSET rsmulti_and_create(NMEM nmem, const struct key_control *kcontrol,
+ int scope, int no_rsets, RSET* rsets)
{
return rsmulti_andor_create(nmem, kcontrol, scope,
no_rsets, rsets, &control_and);
{
struct rset_multiandor_info *info = (struct rset_multiandor_info *) ct->priv;
int i;
- for(i=0;i<info->no_rsets;i++)
+ for(i = 0; i<info->no_rsets; i++)
rset_delete(info->rsets[i]);
}
if (flag & RSETF_WRITE)
{
- logf (LOG_FATAL, "multior set type is read-only");
+ yaz_log (YLOG_FATAL, "multiandor set type is read-only");
return NULL;
}
- rfd=rfd_create_base(ct);
+ rfd = rfd_create_base(ct);
if (rfd->priv) {
- p=(struct rset_multiandor_rfd *)rfd->priv;
+ p = (struct rset_multiandor_rfd *)rfd->priv;
if (!is_and)
heap_clear(p->h);
assert(p->items);
}
else {
p = (struct rset_multiandor_rfd *) nmem_malloc (ct->nmem,sizeof(*p));
- rfd->priv=p;
- p->h=0;
- p->tailbits=0;
+ rfd->priv = p;
+ p->h = 0;
+ p->tailbits = 0;
if (is_and)
- p->tailbits=nmem_malloc(ct->nmem, info->no_rsets*sizeof(char) );
+ p->tailbits = nmem_malloc(ct->nmem, info->no_rsets*sizeof(char) );
else
p->h = heap_create( ct->nmem, info->no_rsets, kctrl);
p->items=(struct heap_item *) nmem_malloc(ct->nmem,
info->no_rsets*sizeof(*p->items));
- for (i=0; i<info->no_rsets; i++){
- p->items[i].rset=info->rsets[i];
- p->items[i].buf=nmem_malloc(ct->nmem,kctrl->key_size);
+ for (i = 0; i<info->no_rsets; i++){
+ p->items[i].rset = info->rsets[i];
+ p->items[i].buf = nmem_malloc(ct->nmem,kctrl->key_size);
}
}
p->flag = flag;
- p->hits=0;
- p->eof=0;
- p->tailcount=0;
+ p->hits = 0;
+ p->eof = 0;
+ p->tailcount = 0;
if (is_and)
{ /* read the array and sort it */
- for (i=0; i<info->no_rsets; i++){
- p->items[i].fd=rset_open(info->rsets[i],RSETF_READ);
- if ( !rset_read(p->items[i].fd, p->items[i].buf) )
- p->eof=1;
- p->tailbits[i]=0;
+ for (i = 0; i<info->no_rsets; i++){
+ p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
+ if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
+ p->eof = 1;
+ p->tailbits[i] = 0;
}
qsort(p->items, info->no_rsets, sizeof(p->items[0]), compare_ands);
} else
{ /* fill the heap for ORing */
- for (i=0; i<info->no_rsets; i++){
- p->items[i].fd=rset_open(info->rsets[i],RSETF_READ);
- if ( rset_read(p->items[i].fd, p->items[i].buf) )
+ for (i = 0; i<info->no_rsets; i++){
+ p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
+ if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
heap_insert(p->h, &(p->items[i]));
}
}
-static int r_forward_or(RSFD rfd, void *buf, const void *untilbuf)
+static int r_forward_or(RSFD rfd, void *buf,
+ TERMID *term,const void *untilbuf)
+{ /* while heap head behind untilbuf, forward it and rebalance heap */
+ struct rset_multiandor_rfd *p = rfd->priv;
+ const struct key_control *kctrl = rfd->rset->keycontrol;
+ if (heap_empty(p->h))
+ return 0;
+ while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
+ {
+ if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf,
+ &p->h->heap[1]->term, untilbuf))
+ heap_balance(p->h);
+ else
+ {
+ heap_delete(p->h);
+ if (heap_empty(p->h))
+ return 0;
+ }
+
+ }
+ return r_read_or(rfd,buf,term);
+}
+
+
+static int r_read_or (RSFD rfd, void *buf, TERMID *term)
{
- struct rset_multiandor_rfd *mrfd=rfd->priv;
- const struct key_control *kctrl=rfd->rset->keycontrol;
- struct heap_item it;
+ struct rset_multiandor_rfd *mrfd = rfd->priv;
+ const struct key_control *kctrl = rfd->rset->keycontrol;
+ struct heap_item *it;
int rdres;
if (heap_empty(mrfd->h))
return 0;
- it = *(mrfd->h->heap[1]);
- memcpy(buf,it.buf, kctrl->key_size);
- /* FIXME - This is not right ! */
- /* If called with an untilbuf, we need to compare to that, and */
- /* forward until we are somewhere! */
+ it = mrfd->h->heap[1];
+ memcpy(buf,it->buf, kctrl->key_size);
+ if (term)
+ *term = it->term;
(mrfd->hits)++;
- if (untilbuf)
- rdres=rset_forward(it.fd, it.buf, untilbuf);
- else
- rdres=rset_read(it.fd, it.buf);
+ rdres = rset_read(it->fd, it->buf, &it->term);
if ( rdres )
heap_balance(mrfd->h);
else
}
-static int r_read_or (RSFD rfd, void *buf)
-{
- return r_forward_or(rfd, buf,0);
-}
-
-static int r_read_and (RSFD rfd, void *buf)
+static int r_read_and (RSFD rfd, void *buf, TERMID *term)
{ /* Has to return all hits where each item points to the */
/* same sysno (scope), in order. Keep an extra key (hitkey) */
/* as long as all records do not point to hitkey, forward */
/* Once a hit has been found, scan all items for the smallest */
/* value. Mark all as being in the tail. Read next from that */
/* item, and if not in the same record, clear its tail bit */
- struct rset_multiandor_rfd *p=rfd->priv;
- const struct key_control *kctrl=rfd->rset->keycontrol;
- struct rset_multiandor_info *info=rfd->rset->priv;
+ struct rset_multiandor_rfd *p = rfd->priv;
+ const struct key_control *kctrl = rfd->rset->keycontrol;
+ struct rset_multiandor_info *info = rfd->rset->priv;
int i, mintail;
int cmp;
while (1) {
if (p->tailcount)
{ /* we are tailing, find lowest tail and return it */
- mintail=0;
+ mintail = 0;
while ((mintail<info->no_rsets) && !p->tailbits[mintail])
mintail++; /* first tail */
- for (i=mintail+1;i<info->no_rsets;i++)
+ for (i = mintail+1; i<info->no_rsets; i++)
{
if (p->tailbits[i])
{
cmp=(*kctrl->cmp)(p->items[i].buf,p->items[mintail].buf);
if (cmp<0)
- mintail=i;
+ mintail = i;
}
}
/* return the lowest tail */
memcpy(buf, p->items[mintail].buf, kctrl->key_size);
- if (!rset_read(p->items[mintail].fd, p->items[mintail].buf))
+ if (term)
+ *term = p->items[mintail].term;
+ if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
+ &p->items[mintail].term))
{
- p->eof=1; /* game over, once tails have been returned */
+ p->eof = 1; /* game over, once tails have been returned */
p->tailbits[mintail]=0;
(p->tailcount)--;
return 1;
/* as tails. the earlier 'if' will then return the hits */
if (p->eof)
return 0; /* nothing more to see */
- i=1; /* assume items[0] is highest up */
+ i = 1; /* assume items[0] is highest up */
while (i<info->no_rsets) {
cmp=(*kctrl->cmp)(p->items[0].buf,p->items[i].buf);
if (cmp<=-rfd->rset->scope) { /* [0] was behind, forward it */
if (!rset_forward(p->items[0].fd, p->items[0].buf,
- p->items[i].buf))
+ &p->items[0].term, p->items[i].buf))
{
- p->eof=1; /* game over */
+ p->eof = 1; /* game over */
return 0;
}
- i=0; /* start frowarding from scratch */
+ i = 0; /* start frowarding from scratch */
} else if (cmp>=rfd->rset->scope)
{ /* [0] was ahead, forward i */
if (!rset_forward(p->items[i].fd, p->items[i].buf,
- p->items[0].buf))
+ &p->items[i].term, p->items[0].buf))
{
- p->eof=1; /* game over */
+ p->eof = 1; /* game over */
return 0;
}
} else
/* if we get this far, all rsets are now within +- scope of [0] */
/* ergo, we have a hit. Mark them all as tailing, and let the */
/* upper 'if' return the hits in right order */
- for (i=0; i<info->no_rsets;i++)
- p->tailbits[i]=1;
- p->tailcount=info->no_rsets;
+ for (i = 0; i<info->no_rsets; i++)
+ p->tailbits[i] = 1;
+ p->tailcount = info->no_rsets;
} /* while 1 */
}
-static int r_forward_and(RSFD rfd, void *buf, const void *untilbuf)
-{
- return 0;
+static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
+ const void *untilbuf)
+{
+ struct rset_multiandor_rfd *p = rfd->priv;
+ const struct key_control *kctrl = rfd->rset->keycontrol;
+ struct rset_multiandor_info *info = rfd->rset->priv;
+ int i;
+ int cmp;
+ int killtail = 0;
+
+ for (i = 0; i<info->no_rsets; i++)
+ {
+ cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
+ if (cmp <= -rfd->rset->scope)
+ {
+ killtail = 1; /* we are moving to a different hit */
+ if (!rset_forward(p->items[i].fd, p->items[i].buf,
+ &p->items[i].term, untilbuf))
+ {
+ p->eof = 1; /* game over */
+ p->tailcount = 0;
+ return 0;
+ }
+ }
+ }
+ if (killtail)
+ {
+ for (i = 0; i<info->no_rsets; i++)
+ p->tailbits[i] = 0;
+ p->tailcount = 0;
+ }
+ return r_read_and(rfd,buf,term);
}
static void r_pos (RSFD rfd, double *current, double *total)
{
- struct rset_multiandor_info *info=
- (struct rset_multiandor_info *)(rfd->rset->priv);
- struct rset_multiandor_rfd *mrfd=(struct rset_multiandor_rfd *)(rfd->priv);
+ struct rset_multiandor_info *info =
+ (struct rset_multiandor_info *)(rfd->rset->priv);
+ struct rset_multiandor_rfd *mrfd =
+ (struct rset_multiandor_rfd *)(rfd->priv);
double cur, tot;
- double scur=0.0, stot=0.0;
+ double scur = 0.0, stot = 0.0;
int i;
- for (i=0; i<info->no_rsets; i++){
+ for (i = 0; i<info->no_rsets; i++){
rset_pos(mrfd->items[i].fd, &cur, &tot);
- logf(LOG_DEBUG, "r_pos: %d %0.1f %0.1f", i, cur,tot);
+ yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
scur += cur;
stot += tot;
}
- if (stot <1.0) { /* nothing there */
- *current=0;
- *total=0;
- return;
+ if (stot < 1.0) { /* nothing there */
+ *current = 0;
+ *total = 0;
+ yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
+ }
+ else
+ {
+ *current = (double) (mrfd->hits);
+ *total = *current*stot/scur;
+ yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
}
- *current=mrfd->hits;
- *total=*current*stot/scur;
}
-
-static void r_rewind (RSFD rfd)
+static int r_write (RSFD rfd, const void *buf)
{
- assert(!"rewind not implemented yet");
- /* FIXME - rewind all parts, rebalance heap, clear hits */
+ yaz_log (YLOG_FATAL, "multior set type is read-only");
+ return -1;
}
-static int r_write (RSFD rfd, const void *buf)
+static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
+ /* Special case: Some multi-ors have all terms pointing to the same */
+ /* term. We do not want to duplicate those. Other multiors (and ands) */
+ /* have different terms under them. Those we want. */
{
- logf (LOG_FATAL, "multior set type is read-only");
- return -1;
+ struct rset_multiandor_info *info =
+ (struct rset_multiandor_info *) ct->priv;
+ int firstterm= *curterm;
+ int i;
+ for (i = 0; i<info->no_rsets; i++)
+ {
+ rset_getterms(info->rsets[i], terms, maxterms, curterm);
+ if ( ( *curterm > firstterm+1 ) &&
+ ( *curterm <= maxterms ) &&
+ ( terms[(*curterm)-1] == terms[firstterm] )
+ )
+ (*curterm)--; /* forget the term, seen that before */
+ }
}
+
+