-/* $Id: rsmultiandor.c,v 1.17 2005-05-03 09:11:36 adam Exp $
- Copyright (C) 1995-2005
- Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2011 Index Data
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
for more details.
You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
*/
-/*
- * This module implements the rsmulti_or and rsmulti_and result sets
+/**
+ * \file rsmultiandor.c
+ * \brief This module implements the rsmulti_or and rsmulti_and result sets
*
* rsmultior is based on a heap, from which we find the next hit.
*
*/
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
const void *untilbuf);
static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
const void *untilbuf);
-static void r_pos (RSFD rfd, double *current, double *total);
+static void r_pos_and(RSFD rfd, double *current, double *total);
+static void r_pos_or(RSFD rfd, double *current, double *total);
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
static const struct rset_control control_or =
r_open_or,
r_close,
r_forward_or,
- r_pos,
+ r_pos_or,
r_read_or,
r_write,
};
r_open_and,
r_close,
r_forward_and,
- r_pos,
+ r_pos_and,
r_read_and,
r_write,
};
struct rset_private {
- int no_rsets;
- RSET *rsets;
+ int dummy;
};
zint hits; /* returned so far */
int eof; /* seen the end of it */
int tailcount; /* how many items are tailing */
+ zint segment;
+ int skip;
char *tailbits;
};
return ( 0==h->heapnum );
}
+/** \brief deletes the first item in the heap, and balances the rest
+ */
static void heap_delete (HEAP h)
-{ /* deletes the first item in the heap, and balances the rest */
+{
int cur = 1, child = 2;
h->heap[1] = 0; /* been deleted */
heap_swap (h, 1, h->heapnum--);
}
}
+/** \brief puts item into heap.
+ The heap root element has changed value (to bigger)
+ Swap downwards until the heap is ordered again
+*/
static void heap_balance (HEAP h)
-{ /* The heap root element has changed value (to bigger) */
- /* swap downwards until the heap is ordered again */
+{
int cur = 1, child = 2;
while (child <= h->heapnum) {
if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
/* nothing to delete, all is nmem'd, and will go away in due time */
}
+/** \brief compare and items for quicksort
+ used in qsort to get the multi-and args in optimal order
+ that is, those with fewest occurrences first
+*/
int compare_ands(const void *x, const void *y)
-{ /* used in qsort to get the multi-and args in optimal order */
- /* that is, those with fewest occurrences first */
- const struct heap_item *hx = x;
+{ const struct heap_item *hx = x;
const struct heap_item *hy = y;
double cur, totx, toty;
rset_pos(hx->fd, &cur, &totx);
return 0; /* return totx - toty, except for overflows and rounding */
}
-/* Creating and deleting rsets ***********************/
-
static RSET rsmulti_andor_create(NMEM nmem,
struct rset_key_control *kcontrol,
- int scope, int no_rsets, RSET* rsets,
+ int scope, TERMID termid,
+ int no_rsets, RSET* rsets,
const struct rset_control *ctrl)
{
- RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope,0);
+ RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
+ no_rsets, rsets);
struct rset_private *info;
if (!log_level_initialized)
{
log_level = yaz_log_module_level("rsmultiandor");
log_level_initialized = 1;
}
- info = (struct rset_private *) nmem_malloc(rnew->nmem,sizeof(*info));
- info->no_rsets = no_rsets;
- info->rsets = (RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets));
- memcpy(info->rsets,rsets,no_rsets*sizeof(*rsets));
+ yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
+ info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
rnew->priv = info;
return rnew;
}
-RSET rsmulti_or_create(NMEM nmem, struct rset_key_control *kcontrol,
- int scope, int no_rsets, RSET* rsets)
+RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
+ int scope, TERMID termid, int no_rsets, RSET* rsets)
{
- return rsmulti_andor_create(nmem, kcontrol, scope,
+ return rsmulti_andor_create(nmem, kcontrol, scope, termid,
no_rsets, rsets, &control_or);
}
-RSET rsmulti_and_create(NMEM nmem, struct rset_key_control *kcontrol,
- int scope, int no_rsets, RSET* rsets)
+RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
+ int scope, int no_rsets, RSET* rsets)
{
- return rsmulti_andor_create(nmem, kcontrol, scope,
+ return rsmulti_andor_create(nmem, kcontrol, scope, 0,
no_rsets, rsets, &control_and);
}
static void r_delete (RSET ct)
{
- struct rset_private *info = (struct rset_private *) ct->priv;
- int i;
- for(i = 0; i<info->no_rsets; i++)
- rset_delete(info->rsets[i]);
}
-
-/* Opening and closing fd's on them *********************/
-
static RSFD r_open_andor (RSET ct, int flag, int is_and)
{
RSFD rfd;
struct rfd_private *p;
- struct rset_private *info = (struct rset_private *) ct->priv;
const struct rset_key_control *kctrl = ct->keycontrol;
int i;
assert(p->items);
/* all other pointers shouls already be allocated, in right sizes! */
}
- else {
+ else
+ {
p = (struct rfd_private *) nmem_malloc (ct->nmem,sizeof(*p));
rfd->priv = p;
p->h = 0;
p->tailbits = 0;
if (is_and)
- p->tailbits = nmem_malloc(ct->nmem, info->no_rsets*sizeof(char) );
+ p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
else
- p->h = heap_create( ct->nmem, info->no_rsets, kctrl);
- p->items=(struct heap_item *) nmem_malloc(ct->nmem,
- info->no_rsets*sizeof(*p->items));
- for (i = 0; i<info->no_rsets; i++)
+ p->h = heap_create( ct->nmem, ct->no_children, kctrl);
+ p->items = (struct heap_item *)
+ nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
+ for (i = 0; i<ct->no_children; i++)
{
- p->items[i].rset = info->rsets[i];
+ p->items[i].rset = ct->children[i];
p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
}
}
p->tailcount = 0;
if (is_and)
{ /* read the array and sort it */
- for (i = 0; i<info->no_rsets; i++){
- p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
+ for (i = 0; i<ct->no_children; i++){
+ p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
p->eof = 1;
p->tailbits[i] = 0;
}
- qsort(p->items, info->no_rsets, sizeof(p->items[0]), compare_ands);
- } else
+ qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
+ }
+ else
{ /* fill the heap for ORing */
- for (i = 0; i<info->no_rsets; i++){
- p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
+ for (i = 0; i<ct->no_children; i++){
+ p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
heap_insert(p->h, &(p->items[i]));
}
static void r_close (RSFD rfd)
{
- struct rset_private *info=
- (struct rset_private *)(rfd->rset->priv);
struct rfd_private *p=(struct rfd_private *)(rfd->priv);
int i;
if (p->h)
heap_destroy (p->h);
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<rfd->rset->no_children; i++)
if (p->items[i].fd)
rset_close(p->items[i].fd);
- rfd_delete_base(rfd);
}
-
-
static int r_forward_or(RSFD rfd, void *buf,
- TERMID *term,const void *untilbuf)
+ TERMID *term, const void *untilbuf)
{ /* while heap head behind untilbuf, forward it and rebalance heap */
struct rfd_private *p = rfd->priv;
const struct rset_key_control *kctrl = rfd->rset->keycontrol;
}
}
- return r_read_or(rfd,buf,term);
+ return r_read_or(rfd, buf, term);
}
+/** \brief reads one item key from an 'or' set
+ \param rfd set handle
+ \param buf resulting item buffer
+ \param term resulting term
+ \retval 0 EOF
+ \retval 1 item could be read
+*/
static int r_read_or (RSFD rfd, void *buf, TERMID *term)
{
+ RSET rset = rfd->rset;
struct rfd_private *mrfd = rfd->priv;
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
+ const struct rset_key_control *kctrl = rset->keycontrol;
struct heap_item *it;
int rdres;
if (heap_empty(mrfd->h))
return 0;
it = mrfd->h->heap[1];
- memcpy(buf,it->buf, kctrl->key_size);
+ memcpy(buf, it->buf, kctrl->key_size);
if (term)
- *term = it->term;
+ {
+ if (rset->term)
+ *term = rset->term;
+ else
+ *term = it->term;
+ }
(mrfd->hits)++;
rdres = rset_read(it->fd, it->buf, &it->term);
if ( rdres )
}
+/** \brief reads one item key from an 'and' set
+ \param rfd set handle
+ \param buf resulting item buffer
+ \param term resulting term
+ \retval 0 EOF
+ \retval 1 item could be read
+
+ Has to return all hits where each item points to the
+ same sysno (scope), in order. Keep an extra key (hitkey)
+ as long as all records do not point to hitkey, forward
+ them, and update hitkey to be the highest seen so far.
+ (if any item eof's, mark eof, and return 0 thereafter)
+ Once a hit has been found, scan all items for the smallest
+ value. Mark all as being in the tail. Read next from that
+ item, and if not in the same record, clear its tail bit
+*/
static int r_read_and (RSFD rfd, void *buf, TERMID *term)
-{ /* Has to return all hits where each item points to the */
- /* same sysno (scope), in order. Keep an extra key (hitkey) */
- /* as long as all records do not point to hitkey, forward */
- /* them, and update hitkey to be the highest seen so far. */
- /* (if any item eof's, mark eof, and return 0 thereafter) */
- /* Once a hit has been found, scan all items for the smallest */
- /* value. Mark all as being in the tail. Read next from that */
- /* item, and if not in the same record, clear its tail bit */
- struct rfd_private *p = rfd->priv;
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
- struct rset_private *info = rfd->rset->priv;
- int i, mintail;
- int cmp;
+{ struct rfd_private *p = rfd->priv;
+ RSET ct = rfd->rset;
+ const struct rset_key_control *kctrl = ct->keycontrol;
+ int i;
while (1) {
if (p->tailcount)
{ /* we are tailing, find lowest tail and return it */
- mintail = 0;
- while ((mintail<info->no_rsets) && !p->tailbits[mintail])
- mintail++; /* first tail */
- for (i = mintail+1; i<info->no_rsets; i++)
+ int mintail = -1;
+ int cmp;
+
+ for (i = 0; i<ct->no_children; i++)
{
if (p->tailbits[i])
{
- cmp=(*kctrl->cmp)(p->items[i].buf,p->items[mintail].buf);
- if (cmp<0)
+ if (mintail >= 0)
+ cmp = (*kctrl->cmp)
+ (p->items[i].buf, p->items[mintail].buf);
+ else
+ cmp = -1;
+ if (cmp < 0)
mintail = i;
+
+ if (kctrl->get_segment)
+ { /* segments enabled */
+ zint segment = kctrl->get_segment(p->items[i].buf);
+ /* store segment if not stored already */
+ if (!p->segment && segment)
+ p->segment = segment;
+
+ /* skip rest entirely if segments don't match */
+ if (p->segment && segment && p->segment != segment)
+ p->skip = 1;
+ }
}
}
/* return the lowest tail */
&p->items[mintail].term))
{
p->eof = 1; /* game over, once tails have been returned */
- p->tailbits[mintail]=0;
+ p->tailbits[mintail] = 0;
(p->tailcount)--;
- return 1;
}
- /* still a tail? */
- cmp=(*kctrl->cmp)(p->items[mintail].buf,buf);
- if (cmp >= rfd->rset->scope){
- p->tailbits[mintail]=0;
- (p->tailcount)--;
+ else
+ {
+ /* still a tail? */
+ cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
+ if (cmp >= rfd->rset->scope)
+ {
+ p->tailbits[mintail] = 0;
+ (p->tailcount)--;
+ }
}
+ if (p->skip)
+ continue; /* skip again.. eventually tailcount will be 0 */
+ if (p->tailcount == 0)
+ (p->hits)++;
return 1;
}
- /* not tailing, forward until all reocrds match, and set up */
+ /* not tailing, forward until all records match, and set up */
/* as tails. the earlier 'if' will then return the hits */
if (p->eof)
return 0; /* nothing more to see */
i = 1; /* assume items[0] is highest up */
- while (i<info->no_rsets) {
- cmp=(*kctrl->cmp)(p->items[0].buf,p->items[i].buf);
- if (cmp<=-rfd->rset->scope) { /* [0] was behind, forward it */
+ while (i < ct->no_children)
+ {
+ int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
+ if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
if (!rset_forward(p->items[0].fd, p->items[0].buf,
&p->items[0].term, p->items[i].buf))
{
p->eof = 1; /* game over */
return 0;
}
- i = 0; /* start frowarding from scratch */
- } else if (cmp>=rfd->rset->scope)
+ i = 0; /* start forwarding from scratch */
+ }
+ else if (cmp>=rfd->rset->scope)
{ /* [0] was ahead, forward i */
if (!rset_forward(p->items[i].fd, p->items[i].buf,
&p->items[i].term, p->items[0].buf))
p->eof = 1; /* game over */
return 0;
}
- } else
+ }
+ else
i++;
} /* while i */
/* if we get this far, all rsets are now within +- scope of [0] */
/* ergo, we have a hit. Mark them all as tailing, and let the */
/* upper 'if' return the hits in right order */
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i < ct->no_children; i++)
p->tailbits[i] = 1;
- p->tailcount = info->no_rsets;
+ p->tailcount = ct->no_children;
+ p->segment = 0;
+ p->skip = 0;
} /* while 1 */
}
const void *untilbuf)
{
struct rfd_private *p = rfd->priv;
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
- struct rset_private *info = rfd->rset->priv;
+ RSET ct = rfd->rset;
+ const struct rset_key_control *kctrl = ct->keycontrol;
int i;
int cmp;
int killtail = 0;
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<ct->no_children; i++)
{
cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
if (cmp <= -rfd->rset->scope)
}
if (killtail)
{
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<ct->no_children; i++)
p->tailbits[i] = 0;
p->tailcount = 0;
}
return r_read_and(rfd,buf,term);
}
-static void r_pos (RSFD rfd, double *current, double *total)
+static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
{
- struct rset_private *info =
- (struct rset_private *)(rfd->rset->priv);
+ RSET ct = rfd->rset;
struct rfd_private *mrfd =
(struct rfd_private *)(rfd->priv);
- double cur, tot;
- double scur = 0.0, stot = 0.0;
+ double ratio = and_op ? 0.0 : 1.0;
int i;
- for (i = 0; i<info->no_rsets; i++){
+ double sum_cur = 0.0;
+ double sum_tot = 0.0;
+ for (i = 0; i<ct->no_children; i++){
+ double nratio, cur, tot;
rset_pos(mrfd->items[i].fd, &cur, &tot);
- yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
- scur += cur;
- stot += tot;
+ yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
+ nratio = cur / tot;
+ if (and_op)
+ {
+ if (nratio > ratio)
+ ratio = nratio;
+ }
+ else
+ {
+ sum_cur += cur;
+ sum_tot += tot;
+ }
}
- if (stot < 1.0) { /* nothing there */
+ if (!and_op)
+ ratio = sum_cur / sum_tot;
+ if (ratio == 0.0 || ratio == 1.0) { /* nothing there */
*current = 0;
*total = 0;
yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
else
{
*current = (double) (mrfd->hits);
- *total = *current*stot/scur;
+ *total = *current / ratio;
yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
}
}
+static void r_pos_and(RSFD rfd, double *current, double *total)
+{
+ r_pos_x(rfd, current, total, 1);
+}
+
+static void r_pos_or(RSFD rfd, double *current, double *total)
+{
+ r_pos_x(rfd, current, total, 0);
+}
+
static int r_write (RSFD rfd, const void *buf)
{
yaz_log (YLOG_FATAL, "multior set type is read-only");
}
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
- /* Special case: Some multi-ors have all terms pointing to the same */
- /* term. We do not want to duplicate those. Other multiors (and ands) */
- /* have different terms under them. Those we want. */
{
- struct rset_private *info =
- (struct rset_private *) ct->priv;
- int firstterm= *curterm;
- int i;
- for (i = 0; i<info->no_rsets; i++)
+ if (ct->term)
+ rset_get_one_term(ct, terms, maxterms, curterm);
+ else
{
- rset_getterms(info->rsets[i], terms, maxterms, curterm);
- if ( ( *curterm > firstterm+1 ) &&
- ( *curterm <= maxterms ) &&
- ( terms[(*curterm)-1] == terms[firstterm] )
- )
- (*curterm)--; /* forget the term, seen that before */
+ /* Special case: Some multi-ors have all terms pointing to the same
+ term. We do not want to duplicate those. Other multiors (and ands)
+ have different terms under them. Those we want.
+ */
+ int firstterm= *curterm;
+ int i;
+
+ for (i = 0; i<ct->no_children; i++)
+ {
+ rset_getterms(ct->children[i], terms, maxterms, curterm);
+ if ( ( *curterm > firstterm+1 ) &&
+ ( *curterm <= maxterms ) &&
+ ( terms[(*curterm)-1] == terms[firstterm] )
+ )
+ (*curterm)--; /* forget the term, seen that before */
+ }
}
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+