X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=rset%2Frsbool.c;h=c573e10a0bbab1e73153f082dd941f453244115c;hp=df2c5e417196f46313fcd7b53fd61f99c31b3637;hb=1d5d4f08cb84516d75fcb5e6ed4199b6454cccd6;hpb=7b0a5daa703117cde2dc0d54d5a39941a1c01ce8 diff --git a/rset/rsbool.c b/rset/rsbool.c index df2c5e4..c573e10 100644 --- a/rset/rsbool.c +++ b/rset/rsbool.c @@ -1,349 +1,283 @@ -/* - * Copyright (C) 1994-1995, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: rsbool.c,v $ - * Revision 1.9 1995-12-11 09:15:22 adam - * New set types: sand/sor/snot - ranked versions of and/or/not in - * ranked/semi-ranked result sets. - * Note: the snot not finished yet. - * New rset member: flag. - * Bug fix: r_delete in rsrel.c did free bad memory block. - * - * Revision 1.8 1995/10/12 12:41:55 adam - * Private info (buf) moved from struct rset_control to struct rset. - * Bug fixes in relevance. - * - * Revision 1.7 1995/10/10 14:00:03 adam - * Function rset_open changed its wflag parameter to general flags. - * - * Revision 1.6 1995/10/06 14:38:05 adam - * New result set method: r_score. - * Local no (sysno) and score is transferred to retrieveCtrl. - * - * Revision 1.5 1995/09/08 14:52:41 adam - * Work on relevance feedback. - * - * Revision 1.4 1995/09/08 08:54:04 adam - * More efficient and operation. - * - * Revision 1.3 1995/09/07 13:58:43 adam - * New parameter: result-set file descriptor (RSFD) to support multiple - * positions within the same result-set. - * Boolean operators: and, or, not implemented. - * - * Revision 1.2 1995/09/06 16:11:55 adam - * More work on boolean sets. - * - * Revision 1.1 1995/09/06 13:27:15 adam - * New set type: bool. Not finished yet. - * - */ +/* This file is part of the Zebra server. + Copyright (C) 2004-2013 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ +#if HAVE_CONFIG_H +#include +#endif #include +#include +#include #include -#include -#include - -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); -static RSFD r_open (RSET ct, int flag); -static void r_close (RSFD rfd); -static void r_delete (RSET ct); -static void r_rewind (RSFD rfd); -static int r_count (RSET ct); -static int r_read_and (RSFD rfd, void *buf); -static int r_read_or (RSFD rfd, void *buf); -static int r_read_not (RSFD rfd, void *buf); -static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); - -static const rset_control control_and = -{ - "and", - r_create, - r_open, - r_close, - r_delete, - r_rewind, - r_count, - r_read_and, - r_write, - r_score -}; +#include +#include -static const rset_control control_or = -{ - "or", - r_create, - r_open, - r_close, - r_delete, - r_rewind, - r_count, - r_read_or, - r_write, - r_score -}; +#ifndef RSET_DEBUG +#define RSET_DEBUG 0 +#endif -static const rset_control control_not = +static RSFD r_open(RSET ct, int flag); +static void r_close(RSFD rfd); +static void r_delete(RSET ct); +static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); +static void r_pos(RSFD rfd, double *current, double *total); +static int r_read_not(RSFD rfd, void *buf, TERMID *term); +static int r_write(RSFD rfd, const void *buf); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); + +static const struct rset_control control_not = { "not", - r_create, + r_delete, + r_get_terms, r_open, r_close, - r_delete, - r_rewind, - r_count, + r_forward, + r_pos, r_read_not, r_write, - r_score }; - -const rset_control *rset_kind_and = &control_and; -const rset_control *rset_kind_or = &control_or; -const rset_control *rset_kind_not = &control_not; - -struct rset_bool_info { - int key_size; +struct rset_private { RSET rset_l; RSET rset_r; - int (*cmp)(const void *p1, const void *p2); - struct rset_bool_rfd *rfd_list; }; -struct rset_bool_rfd { +struct rfd_private { + zint hits; RSFD rfd_l; RSFD rfd_r; int more_l; int more_r; void *buf_l; void *buf_r; - struct rset_bool_rfd *next; - struct rset_bool_info *info; -}; + TERMID term_l; + TERMID term_r; + int tail; +}; -static void *r_create (const struct rset_control *sel, void *parms, - int *flags) +static RSET rsbool_create_base(const struct rset_control *ctrl, + NMEM nmem, + struct rset_key_control *kcontrol, + int scope, RSET rset_l, RSET rset_r) { - rset_bool_parms *bool_parms = parms; - struct rset_bool_info *info; - - info = xmalloc (sizeof(*info)); - info->key_size = bool_parms->key_size; - info->rset_l = bool_parms->rset_l; - info->rset_r = bool_parms->rset_r; - if (rset_is_volatile(info->rset_l) || rset_is_volatile(info->rset_r)) - *flags |= RSET_FLAG_VOLATILE; - info->cmp = bool_parms->cmp; - info->rfd_list = NULL; - return info; + RSET children[2], rnew; + struct rset_private *info; + + children[0] = rset_l; + children[1] = rset_r; + rnew = rset_create_base(ctrl, nmem, kcontrol, scope, 0, 2, children); + info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info)); + info->rset_l = rset_l; + info->rset_r = rset_r; + rnew->priv = info; + return rnew; } -static RSFD r_open (RSET ct, int flag) +RSET rset_create_not(NMEM nmem, struct rset_key_control *kcontrol, + int scope, RSET rset_l, RSET rset_r) { - struct rset_bool_info *info = ct->buf; - struct rset_bool_rfd *rfd; - - if (flag & RSETF_WRITE) - { - logf (LOG_FATAL, "bool set type is read-only"); - return NULL; - } - rfd = xmalloc (sizeof(*rfd)); - rfd->next = info->rfd_list; - info->rfd_list = rfd; - rfd->info = info; - - rfd->buf_l = xmalloc (info->key_size); - rfd->buf_r = xmalloc (info->key_size); - rfd->rfd_l = rset_open (info->rset_l, RSETF_READ|RSETF_SORT_SYSNO); - rfd->rfd_r = rset_open (info->rset_r, RSETF_READ|RSETF_SORT_SYSNO); - rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l); - rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r); - return rfd; + return rsbool_create_base(&control_not, nmem, kcontrol, + scope, rset_l, rset_r); } -static void r_close (RSFD rfd) +static void r_delete(RSET ct) { - struct rset_bool_info *info = ((struct rset_bool_rfd*)rfd)->info; - struct rset_bool_rfd **rfdp; - - for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) - if (*rfdp == rfd) - { - xfree ((*rfdp)->buf_l); - xfree ((*rfdp)->buf_r); - rset_close (info->rset_l, (*rfdp)->rfd_l); - rset_close (info->rset_r, (*rfdp)->rfd_r); - *rfdp = (*rfdp)->next; - free (rfd); - return; - } - logf (LOG_FATAL, "r_close but no rfd match!"); - assert (0); } -static void r_delete (RSET ct) +static RSFD r_open(RSET ct, int flag) { - struct rset_bool_info *info = ct->buf; + struct rset_private *info = (struct rset_private *) ct->priv; + RSFD rfd; + struct rfd_private *p; - assert (info->rfd_list == NULL); - rset_delete (info->rset_l); - rset_delete (info->rset_r); - xfree (info); -} + if (flag & RSETF_WRITE) + { + yaz_log(YLOG_FATAL, "bool set type is read-only"); + return NULL; + } + rfd = rfd_create_base(ct); + if (rfd->priv) + p = (struct rfd_private *)rfd->priv; + else { + p = nmem_malloc(ct->nmem,sizeof(*p)); + rfd->priv = p; + p->buf_l = nmem_malloc(ct->nmem, ct->keycontrol->key_size); + p->buf_r = nmem_malloc(ct->nmem, ct->keycontrol->key_size); + } -static void r_rewind (RSFD rfd) -{ - struct rset_bool_info *info = ((struct rset_bool_rfd*)rfd)->info; - struct rset_bool_rfd *p = rfd; - - logf (LOG_DEBUG, "rsbool_rewind"); - rset_rewind (info->rset_l, p->rfd_l); - rset_rewind (info->rset_r, p->rfd_r); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); -} + yaz_log(YLOG_DEBUG,"rsbool (%s) open [%p]", ct->control->desc, rfd); + p->hits=0; -static int r_count (RSET ct) -{ - return 0; + p->rfd_l = rset_open (info->rset_l, RSETF_READ); + p->rfd_r = rset_open (info->rset_r, RSETF_READ); + p->more_l = rset_read(p->rfd_l, p->buf_l, &p->term_l); + p->more_r = rset_read(p->rfd_r, p->buf_r, &p->term_r); + p->tail = 0; + return rfd; } -static int r_read_and (RSFD rfd, void *buf) +static void r_close (RSFD rfd) { - struct rset_bool_rfd *p = rfd; - struct rset_bool_info *info = p->info; - - while (p->more_l && p->more_r) - { - int cmp; + struct rfd_private *prfd=(struct rfd_private *)rfd->priv; - cmp = (*info->cmp)(p->buf_l, p->buf_r); - if (!cmp) - { - memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); - return 1; - } - else if (cmp == 1) - { - memcpy (buf, p->buf_r, info->key_size); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); - return 1; - } - else if (cmp == -1) - { - memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - return 1; - } - else if (cmp > 1) - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); - else - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - } - return 0; + rset_close (prfd->rfd_l); + rset_close (prfd->rfd_r); } -static int r_read_or (RSFD rfd, void *buf) +static int r_forward(RSFD rfd, void *buf, TERMID *term, + const void *untilbuf) { - struct rset_bool_rfd *p = rfd; - struct rset_bool_info *info = p->info; - - while (p->more_l || p->more_r) - { - int cmp; - - if (p->more_l && p->more_r) - cmp = (*info->cmp)(p->buf_l, p->buf_r); - else if (p->more_r) - cmp = 2; - else - cmp = -2; - if (!cmp) - { - memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); - return 1; - } - else if (cmp > 0) - { - memcpy (buf, p->buf_r, info->key_size); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); - return 1; - } - else - { - memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - return 1; - } - } - return 0; + struct rfd_private *p = (struct rfd_private *)rfd->priv; + const struct rset_key_control *kctrl=rfd->rset->keycontrol; + + if ( p->more_l && ((kctrl->cmp)(untilbuf,p->buf_l)>=rfd->rset->scope) ) + p->more_l = rset_forward(p->rfd_l, p->buf_l, &p->term_l, untilbuf); + if ( p->more_r && ((kctrl->cmp)(untilbuf,p->buf_r)>=rfd->rset->scope)) + p->more_r = rset_forward(p->rfd_r, p->buf_r, &p->term_r, untilbuf); + p->tail = 0; + return rset_read(rfd,buf,term); } -static int r_read_not (RSFD rfd, void *buf) + +/* + 1,1 1,3 + 1,9 2,1 + 1,11 3,1 + 2,9 + + 1,1 1,1 + 1,3 1,3 + 1,9 + 1,11 + 2,1 2,1 + 2,9 + 3,1 +*/ + +static int r_read_not(RSFD rfd, void *buf, TERMID *term) { - struct rset_bool_rfd *p = rfd; - struct rset_bool_info *info = p->info; + struct rfd_private *p = (struct rfd_private *)rfd->priv; + const struct rset_key_control *kctrl = rfd->rset->keycontrol; - while (p->more_l || p->more_r) + while (p->more_l) { int cmp; - if (p->more_l && p->more_r) - cmp = (*info->cmp)(p->buf_l, p->buf_r); - else if (p->more_r) - cmp = 2; + if (p->more_r) + cmp = (*kctrl->cmp)(p->buf_l, p->buf_r); else - cmp = -2; - if (cmp < -1) - { - memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + cmp = -rfd->rset->scope; + + if (cmp <= -rfd->rset->scope) + { /* cmp == -2 */ + memcpy (buf, p->buf_l, kctrl->key_size); + if (term) + *term=p->term_l; + p->more_l = rset_read(p->rfd_l, p->buf_l, &p->term_l); + p->hits++; return 1; } - else if (cmp > 1) - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); - else + else if (cmp >= rfd->rset->scope) /* cmp >1 */ { - memcpy (buf, p->buf_l, info->key_size); + p->more_r = rset_forward( p->rfd_r, p->buf_r, + &p->term_r, p->buf_l); + } + else + { /* cmp== -1, 0, or 1 */ + memcpy (buf, p->buf_l, kctrl->key_size); + if (term) + *term = p->term_l; do { - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + p->more_l = rset_read(p->rfd_l, p->buf_l, &p->term_l); if (!p->more_l) break; - cmp = (*info->cmp)(p->buf_l, buf); - } while (cmp >= -1 && cmp <= 1); + cmp = (*kctrl->cmp)(p->buf_l, buf); + } while (abs(cmp)rset->scope); + /* (cmp >= -1 && cmp <= 1) */ do { - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + p->more_r = rset_read(p->rfd_r, p->buf_r, &p->term_r); if (!p->more_r) break; - cmp = (*info->cmp)(p->buf_r, buf); - } while (cmp >= -1 && cmp <= 1); + cmp = (*kctrl->cmp)(p->buf_r, buf); + } while (abs(cmp)rset->scope); + /* (cmp >= -1 && cmp <= 1) */ } } return 0; } -static int r_write (RSFD rfd, const void *buf) +static int r_write(RSFD rfd, const void *buf) { - logf (LOG_FATAL, "bool set type is read-only"); + yaz_log(YLOG_FATAL, "bool set type is read-only"); return -1; } -static int r_score (RSFD rfd, int *score) +static void r_pos(RSFD rfd, double *current, double *total) { - *score = -1; - return -1; + struct rfd_private *p = (struct rfd_private *)rfd->priv; + double lcur, ltot; + double rcur, rtot; + double r; + ltot = -1; + rtot = -1; + rset_pos(p->rfd_l, &lcur, <ot); + rset_pos(p->rfd_r, &rcur, &rtot); + if ( (rtot<0) && (ltot<0)) { /*no position */ + *current = rcur; /* return same as you got */ + *total = rtot; /* probably -1 for not available */ + } + if (rtot < 0) + rtot = rcur = 0; /* if only one useful, use it */ + if (ltot < 0) + ltot = lcur = 0; + if (rtot+ltot < 1) + { /* empty rset */ + *current = *total = 0; + return; + } + r = 1.0*(lcur+rcur)/(ltot+rtot); /* weighed average of l and r */ + *current = (double) (p->hits); + *total = *current/r ; +#if RSET_DEBUG + yaz_log(YLOG_DEBUG,"bool_pos: (%s/%s) %0.1f/%0.1f= %0.4f ", + info->rset_l->control->desc, info->rset_r->control->desc, + *current, *total, r); +#endif } +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + struct rset_private *info = (struct rset_private *) ct->priv; + rset_getterms(info->rset_l, terms, maxterms, curterm); + rset_getterms(info->rset_r, terms, maxterms, curterm); +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +