-/* $Id: rsprox.c,v 1.3 2004-06-16 21:27:37 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
- Index Data Aps
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2009 Index Data
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
for more details.
You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
-#include <rsprox.h>
-#include <zebrautl.h>
+#include <idzebra/util.h>
+#include <rset.h>
#ifndef RSET_DEBUG
#define RSET_DEBUG 0
#endif
-static void *r_create(RSET ct, const struct rset_control *sel, void *parms);
-static RSFD r_open (RSET ct, int flag);
-static void r_close (RSFD rfd);
-static void r_delete (RSET ct);
-static void r_rewind (RSFD rfd);
-static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index,
- int (*cmpfunc)(const void *p1, const void *p2),
- const void *untilbuf);
-static int r_count (RSET ct);
-static int r_read (RSFD rfd, void *buf, int *term_index);
-static int r_write (RSFD rfd, const void *buf);
+static RSFD r_open(RSET ct, int flag);
+static void r_close(RSFD rfd);
+static void r_delete(RSET ct);
+static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf);
+static int r_read(RSFD rfd, void *buf, TERMID *term);
+static int r_write(RSFD rfd, const void *buf);
+static void r_pos(RSFD rfd, double *current, double *total);
+static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
-static const struct rset_control control_prox =
+static const struct rset_control control =
{
"prox",
- r_create,
+ r_delete,
+ r_get_terms,
r_open,
r_close,
- r_delete,
- r_rewind,
r_forward,
- r_count,
+ r_pos,
r_read,
r_write,
};
-const struct rset_control *rset_kind_prox = &control_prox;
-
struct rset_prox_info {
- struct rset_prox_parms p;
-
- struct rset_prox_rfd *rfd_list;
+ int ordered;
+ int exclusion;
+ int relation;
+ int distance;
};
struct rset_prox_rfd {
RSFD *rfd;
char **buf; /* lookahead key buffers */
char *more; /* more in each lookahead? */
- struct rset_prox_rfd *next;
- struct rset_prox_info *info;
+ TERMID *terms; /* lookahead terms */
+ zint hits;
};
-static void *r_create (RSET ct, const struct rset_control *sel, void *parms)
+
+RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol,
+ int scope,
+ int rset_no, RSET *rset,
+ int ordered, int exclusion,
+ int relation, int distance)
{
- rset_prox_parms *prox_parms = (rset_prox_parms *) parms;
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0,
+ rset_no, rset);
struct rset_prox_info *info;
- int i;
- char prox_term[512];
- int length_prox_term = 0;
- int min_nn = 10000000;
- const char *flags = NULL;
- int term_type = 0;
-
-
- info = (struct rset_prox_info *) xmalloc (sizeof(*info));
- memcpy(&info->p, prox_parms, sizeof(struct rset_prox_parms));
- assert(info->p.rset_no >= 2);
- info->p.rset = xmalloc(info->p.rset_no * sizeof(*info->p.rset));
- memcpy(info->p.rset, prox_parms->rset,
- info->p.rset_no * sizeof(*info->p.rset));
- info->rfd_list = NULL;
-
- for (i = 0; i<info->p.rset_no; i++)
- if (rset_is_volatile(info->p.rset[i]))
- ct->flags |= RSET_FLAG_VOLATILE;
-
- *prox_term = '\0';
- for (i = 0; i<info->p.rset_no; i++)
- {
- int j;
- for (j = 0; j < info->p.rset[i]->no_rset_terms; j++)
- {
- const char *nflags = info->p.rset[i]->rset_terms[j]->flags;
- char *term = info->p.rset[i]->rset_terms[j]->name;
- int lterm = strlen(term);
- if (lterm + length_prox_term < sizeof(prox_term)-1)
- {
- if (length_prox_term)
- prox_term[length_prox_term++] = ' ';
- strcpy (prox_term + length_prox_term, term);
- length_prox_term += lterm;
- }
- if (min_nn > info->p.rset[i]->rset_terms[j]->nn)
- min_nn = info->p.rset[i]->rset_terms[j]->nn;
- flags = nflags;
- term_type = info->p.rset[i]->rset_terms[j]->type;
- }
- }
-
- ct->no_rset_terms = 1;
- ct->rset_terms = (RSET_TERM *)
- xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
+ info = (struct rset_prox_info *) nmem_malloc(rnew->nmem,sizeof(*info));
+ info->ordered = ordered;
+ info->exclusion = exclusion;
+ info->relation = relation;
+ info->distance = distance;
+ rnew->priv = info;
+ return rnew;
+}
- ct->rset_terms[0] = rset_term_create (prox_term, length_prox_term,
- flags, term_type);
- return info;
+static void r_delete(RSET ct)
+{
}
-static RSFD r_open (RSET ct, int flag)
+static RSFD r_open(RSET ct, int flag)
{
- struct rset_prox_info *info = (struct rset_prox_info *) ct->buf;
- struct rset_prox_rfd *rfd;
- int i, dummy;
+ RSFD rfd;
+ struct rset_prox_rfd *p;
+ int i;
if (flag & RSETF_WRITE)
{
- logf (LOG_FATAL, "prox set type is read-only");
- return NULL;
+ yaz_log(YLOG_FATAL, "prox set type is read-only");
+ return NULL;
}
- rfd = (struct rset_prox_rfd *) xmalloc (sizeof(*rfd));
- logf(LOG_DEBUG,"rsprox (%s) open [%p]", ct->control->desc, rfd);
- rfd->next = info->rfd_list;
- info->rfd_list = rfd;
- rfd->info = info;
-
- rfd->more = xmalloc (sizeof(*rfd->more) * info->p.rset_no);
-
- rfd->buf = xmalloc(sizeof(*rfd->buf) * info->p.rset_no);
- for (i = 0; i < info->p.rset_no; i++)
- rfd->buf[i] = xmalloc (info->p.key_size);
-
- rfd->rfd = xmalloc(sizeof(*rfd->rfd) * info->p.rset_no);
- for (i = 0; i < info->p.rset_no; i++)
- rfd->rfd[i] = rset_open (info->p.rset[i], RSETF_READ);
-
- for (i = 0; i < info->p.rset_no; i++)
- rfd->more[i] = rset_read (info->p.rset[i], rfd->rfd[i],
- rfd->buf[i], &dummy);
- return rfd;
-}
-
-static void r_close (RSFD rfd)
-{
- struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info;
- struct rset_prox_rfd **rfdp;
-
- for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
- if (*rfdp == rfd)
+ rfd = rfd_create_base(ct);
+ if (rfd->priv)
+ p = (struct rset_prox_rfd *)(rfd->priv);
+ else
+ {
+ p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p));
+ rfd->priv = p;
+ p->more = nmem_malloc(ct->nmem,sizeof(*p->more) * ct->no_children);
+ p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children);
+ p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children);
+ for (i = 0; i < ct->no_children; i++)
{
- int i;
- for (i = 0; i<info->p.rset_no; i++)
- xfree ((*rfdp)->buf[i]);
- xfree ((*rfdp)->buf);
- xfree ((*rfdp)->more);
-
- for (i = 0; i<info->p.rset_no; i++)
- rset_close (info->p.rset[i], (*rfdp)->rfd[i]);
- xfree ((*rfdp)->rfd);
-
- *rfdp = (*rfdp)->next;
- xfree (rfd);
- return;
+ p->buf[i] = nmem_malloc(ct->nmem,ct->keycontrol->key_size);
+ p->terms[i] = 0;
}
- logf (LOG_FATAL, "r_close but no rfd match!");
- assert (0);
+ p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * ct->no_children);
+ }
+ yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d",
+ ct->control->desc, rfd, ct->no_children);
+
+ for (i = 0; i < ct->no_children; i++)
+ {
+ p->rfd[i] = rset_open(ct->children[i], RSETF_READ);
+ p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
+ }
+ p->hits = 0;
+ return rfd;
}
-static void r_delete (RSET ct)
+static void r_close(RSFD rfd)
{
- struct rset_prox_info *info = (struct rset_prox_info *) ct->buf;
+ RSET ct = rfd->rset;
+ struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
+
int i;
-
- assert (info->rfd_list == NULL);
- rset_term_destroy(ct->rset_terms[0]);
- xfree (ct->rset_terms);
- for (i = 0; i<info->p.rset_no; i++)
- rset_delete (info->p.rset[i]);
- xfree (info->p.rset);
- xfree (info);
+ for (i = 0; i < ct->no_children; i++)
+ rset_close(p->rfd[i]);
}
-static void r_rewind (RSFD rfd)
+static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
{
- struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info;
- struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd;
- int dummy, i;
-
- logf (LOG_DEBUG, "rsprox_rewind");
-
- for (i = 0; i < info->p.rset_no; i++)
- {
- rset_rewind (info->p.rset[i], p->rfd[i]);
- p->more[i] = rset_read (info->p.rset[i], p->rfd[i], p->buf[i], &dummy);
- }
-}
-
-static int r_forward (RSET ct, RSFD rfd, void *buf, int *term_index,
- int (*cmpfunc)(const void *p1, const void *p2),
- const void *untilbuf)
-{
- /* Note: CT is not used. We _can_ pass NULL for it */
- struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info;
- struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd;
- int cmp=0;
+ RSET ct = rfd->rset;
+ struct rset_prox_info *info = (struct rset_prox_info *)(ct->priv);
+ struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
+ const struct rset_key_control *kctrl = ct->keycontrol;
+ int cmp = 0;
int i;
- int dummy;
if (untilbuf)
{
- /* it's enough to forward first one. Other will follow
- automatically */
- if ( p->more[0] && ((cmpfunc)(untilbuf, p->buf[0]) >= 2) )
- p->more[0] = rset_forward(info->p.rset[0], p->rfd[0],
- p->buf[0], &dummy, info->p.cmp,
- untilbuf);
+ /* it is enough to forward first one. Other will follow. */
+ if (p->more[0] && /* was: cmp >=2 */
+ ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) )
+ p->more[0] = rset_forward(p->rfd[0], p->buf[0],
+ &p->terms[0], untilbuf);
}
- if (info->p.ordered && info->p.relation == 3 && info->p.exclusion == 0
- && info->p.distance == 1)
+ if (info->ordered && info->relation <= 3 && info->exclusion == 0)
{
- while (p->more[0])
- {
- for (i = 1; i < info->p.rset_no; i++)
- {
- if (!p->more[i])
- {
- p->more[0] = 0; /* saves us a goto out of while loop. */
- break;
- }
- cmp = (*info->p.cmp) (p->buf[i], p->buf[i-1]);
- if (cmp > 1)
- {
- p->more[i-1] = rset_forward (info->p.rset[i-1],
- p->rfd[i-1],
- p->buf[i-1], &dummy,
- info->p.cmp,
- p->buf[i]);
- break;
- }
- else if (cmp == 1)
- {
- if ((*info->p.getseq)(p->buf[i-1]) +1 !=
- (*info->p.getseq)(p->buf[i]))
- {
- p->more[i-1] = rset_read (
- info->p.rset[i-1], p->rfd[i-1],
- p->buf[i-1], &dummy);
- break;
- }
- }
- else
- {
- p->more[i] = rset_forward (info->p.rset[i], p->rfd[i],
- p->buf[i], &dummy,
- info->p.cmp,
- p->buf[i-1]);
- break;
- }
- }
- if (i == p->info->p.rset_no)
- {
- memcpy (buf, p->buf[0], info->p.key_size);
- *term_index = 0;
-
- p->more[0] = rset_read (info->p.rset[0], p->rfd[0],
- p->buf[0], &dummy);
- return 1;
- }
- }
+ while (p->more[0])
+ {
+ for (i = 1; i < ct->no_children; i++)
+ {
+ if (!p->more[i])
+ {
+ p->more[0] = 0; /* saves us a goto out of while loop. */
+ break;
+ }
+ cmp = (*kctrl->cmp)(p->buf[i], p->buf[i-1]);
+ if (cmp >= rfd->rset->scope) /* not same record */
+ {
+ p->more[i-1] = rset_forward(p->rfd[i-1],
+ p->buf[i-1],
+ &p->terms[i-1],
+ p->buf[i]);
+ break;
+ }
+ else if (cmp > 0) /* within record and ordered */
+ {
+ zint diff = (*kctrl->getseq)(p->buf[i]) -
+ (*kctrl->getseq)(p->buf[i-1]);
+ if (info->relation == 3 && diff == info->distance)
+ continue;
+ else if (info->relation == 2 && diff <= info->distance)
+ continue;
+ else if (info->relation == 1 && diff < info->distance)
+ continue;
+
+ p->more[i-1] = rset_read(p->rfd[i-1], p->buf[i-1],
+ &p->terms[i-1]);
+ break;
+ }
+ else /* within record - wrong order */
+ {
+ p->more[i] = rset_forward(p->rfd[i], p->buf[i],
+ &p->terms[i], p->buf[i-1]);
+ break;
+ }
+ }
+ if (i == ct->no_children)
+ {
+ i = ct->no_children-1;
+ memcpy(buf, p->buf[i], kctrl->key_size);
+ if (term)
+ *term = p->terms[i];
+ p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
+ p->hits++;
+ return 1;
+ }
+ }
}
- else if (info->p.rset_no == 2)
+ else if (ct->no_children == 2)
{
- while (p->more[0] && p->more[1])
- {
- int cmp = (*info->p.cmp)(p->buf[0], p->buf[1]);
- if (cmp < -1)
- p->more[0] = rset_forward (info->p.rset[0], p->rfd[0],
- p->buf[0],
- term_index, info->p.cmp, p->buf[0]);
- else if (cmp > 1)
- p->more[1] = rset_forward (info->p.rset[1], p->rfd[1],
- p->buf[1],
- term_index, info->p.cmp, p->buf[1]);
- else
- {
- int seqno[500];
- int n = 0;
-
- seqno[n++] = (*info->p.getseq)(p->buf[0]);
- while ((p->more[0] = rset_read (info->p.rset[0], p->rfd[0],
- p->buf[0],
- term_index)) >= -1 &&
- p->more[0] <= -1)
- if (n < 500)
- seqno[n++] = (*info->p.getseq)(p->buf[0]);
-
- for (i = 0; i<n; i++)
- {
- int diff = (*info->p.getseq)(p->buf[1]) - seqno[i];
- int excl = info->p.exclusion;
- if (!info->p.ordered && diff < 0)
- diff = -diff;
- switch (info->p.relation)
- {
- case 1: /* < */
- if (diff < info->p.distance && diff >= 0)
- excl = !excl;
- break;
- case 2: /* <= */
- if (diff <= info->p.distance && diff >= 0)
- excl = !excl;
- break;
- case 3: /* == */
- if (diff == info->p.distance && diff >= 0)
- excl = !excl;
- break;
- case 4: /* >= */
- if (diff >= info->p.distance && diff >= 0)
- excl = !excl;
- break;
- case 5: /* > */
- if (diff > info->p.distance && diff >= 0)
- excl = !excl;
- break;
- case 6: /* != */
- if (diff != info->p.distance && diff >= 0)
- excl = !excl;
- break;
- }
- if (excl)
- {
- memcpy (buf, p->buf[1], info->p.key_size);
- *term_index = 0;
-
- p->more[1] = rset_read (info->p.rset[1],
- p->rfd[1], p->buf[1],
- term_index);
- return 1;
- }
- }
- p->more[1] = rset_read (info->p.rset[1], p->rfd[1],
- p->buf[1],
- term_index);
- }
- }
+ while (p->more[0] && p->more[1])
+ {
+ int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
+ if ( cmp <= - rfd->rset->scope) /* cmp<-1*/
+ p->more[0] = rset_forward(p->rfd[0], p->buf[0],
+ &p->terms[0],p->buf[1]);
+ else if ( cmp >= rfd->rset->scope ) /* cmp>1 */
+ p->more[1] = rset_forward(p->rfd[1], p->buf[1],
+ &p->terms[1],p->buf[0]);
+ else
+ {
+ zint seqno[500]; /* FIXME - why 500 ?? */
+ int n = 0;
+
+ seqno[n++] = (*kctrl->getseq)(p->buf[0]);
+ while ((p->more[0] = rset_read(p->rfd[0],
+ p->buf[0], &p->terms[0])))
+ {
+ cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
+ if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
+ break;
+ if (n < 500)
+ seqno[n++] = (*kctrl->getseq)(p->buf[0]);
+ }
+ /* set up return buffer.. (save buf[1]) */
+ memcpy(buf, p->buf[1], kctrl->key_size);
+ if (term)
+ *term = p->terms[1];
+ while (1)
+ {
+ for (i = 0; i < n; i++)
+ {
+ zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i];
+ int excl = info->exclusion;
+ if (!info->ordered && diff < 0)
+ diff = -diff;
+ switch (info->relation)
+ {
+ case 1: /* < */
+ if (diff < info->distance && diff >= 0)
+ excl = !excl;
+ break;
+ case 2: /* <= */
+ if (diff <= info->distance && diff >= 0)
+ excl = !excl;
+ break;
+ case 3: /* == */
+ if (diff == info->distance && diff >= 0)
+ excl = !excl;
+ break;
+ case 4: /* >= */
+ if (diff >= info->distance && diff >= 0)
+ excl = !excl;
+ break;
+ case 5: /* > */
+ if (diff > info->distance && diff >= 0)
+ excl = !excl;
+ break;
+ case 6: /* != */
+ if (diff != info->distance && diff >= 0)
+ excl = !excl;
+ break;
+ }
+ if (excl)
+ {
+ p->more[1] = rset_read( p->rfd[1], p->buf[1],
+ &p->terms[1]);
+ p->hits++;
+ return 1;
+ }
+ }
+ p->more[1] = rset_read(p->rfd[1], p->buf[1], &p->terms[1]);
+ if (!p->more[1])
+ break;
+ cmp = (*kctrl->cmp)(buf, p->buf[1]);
+ if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
+ break;
+ }
+ }
+ }
}
return 0;
}
-static int r_count (RSET ct)
+
+static int r_read(RSFD rfd, void *buf, TERMID *term)
{
- return 0;
+ return r_forward(rfd, buf, term, 0);
+}
+
+static int r_write(RSFD rfd, const void *buf)
+{
+ yaz_log(YLOG_FATAL, "prox set type is read-only");
+ return -1;
}
-static int r_read (RSFD rfd, void *buf, int *term_index)
+static void r_pos(RSFD rfd, double *current, double *total)
{
- return r_forward(0, rfd, buf, term_index, 0, 0);
+ RSET ct = rfd->rset;
+ struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
+ int i;
+ double r = 0.0;
+ double cur, tot = -1.0;
+ double scur = 0.0, stot = 0.0;
+
+ yaz_log(YLOG_DEBUG, "rsprox_pos");
+
+ for (i = 0; i < ct->no_children; i++)
+ {
+ rset_pos(p->rfd[i], &cur, &tot);
+ if (tot > 0)
+ {
+ scur += cur;
+ stot += tot;
+ }
+ }
+ if (tot < 0)
+ { /* nothing found */
+ *current = -1;
+ *total = -1;
+ }
+ else if (tot < 1)
+ { /* most likely tot==0 */
+ *current = 0;
+ *total = 0;
+ }
+ else
+ {
+ r = scur/stot;
+ *current = (double) p->hits;
+ *total = *current/r ;
+ }
+ yaz_log(YLOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ",
+ i,*current, *total, r);
}
-static int r_write (RSFD rfd, const void *buf)
+static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
{
- logf (LOG_FATAL, "prox set type is read-only");
- return -1;
+ int i;
+ for (i = 0; i < ct->no_children; i++)
+ rset_getterms(ct->children[i], terms, maxterms, curterm);
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+