X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=rset%2Frsprox.c;h=4352e3e413c8c96cb2b3de5b0e9f9e90cd481052;hb=d195e3229385349f842d2f82b65ad8118d64aec5;hp=4c5701c1be7473cfb62349bfb5f6dc147774a32c;hpb=927733195c5646988dcc65365bb45b978b02a15a;p=idzebra-moved-to-github.git diff --git a/rset/rsprox.c b/rset/rsprox.c index 4c5701c..4352e3e 100644 --- a/rset/rsprox.c +++ b/rset/rsprox.c @@ -1,4 +1,4 @@ -/* $Id: rsprox.c,v 1.11 2004-08-24 14:25:16 heikki Exp $ +/* $Id: rsprox.c,v 1.22 2005-01-04 19:59:59 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -26,7 +26,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#include +#include #ifndef RSET_DEBUG #define RSET_DEBUG 0 @@ -35,21 +35,19 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); -static void r_rewind (RSFD rfd); -static int r_forward(RSET ct, RSFD rfd, void *buf, - int (*cmpfunc)(const void *p1, const void *p2), - const void *untilbuf); -static int r_read (RSFD rfd, void *buf); +static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); +static int r_read (RSFD rfd, void *buf, TERMID *term); static int r_write (RSFD rfd, const void *buf); static void r_pos (RSFD rfd, double *current, double *total); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); static const struct rset_control control = { "prox", + r_delete, + r_get_terms, r_open, r_close, - r_delete, - r_rewind, r_forward, r_pos, r_read, @@ -59,42 +57,31 @@ static const struct rset_control control = const struct rset_control *rset_kind_prox = &control; struct rset_prox_info { -/* struct rset_prox_parms p; */ - RSET *rset; - int rset_no; + RSET *rset; /* array of 'child' rsets */ + int rset_no; /* how many of them */ int ordered; int exclusion; int relation; int distance; - int key_size; - int (*cmp)(const void *p1, const void *p2); - int (*getseq)(const void *p); - struct rset_prox_rfd *rfd_list; }; struct rset_prox_rfd { RSFD *rfd; char **buf; /* lookahead key buffers */ char *more; /* more in each lookahead? */ - struct rset_prox_rfd *next; - struct rset_prox_info *info; + TERMID *terms; /* lookahead terms */ zint hits; }; -RSET rsprox_create( NMEM nmem, int key_size, - int (*cmp)(const void *p1, const void *p2), - int (*getseq)(const void *p), +RSET rsprox_create( NMEM nmem, const struct key_control *kcontrol, int scope, int rset_no, RSET *rset, int ordered, int exclusion, int relation, int distance) { - RSET rnew=rset_create_base(&control, nmem); + RSET rnew=rset_create_base(&control, nmem, kcontrol, scope,0); struct rset_prox_info *info; info = (struct rset_prox_info *) nmem_malloc(rnew->nmem,sizeof(*info)); - info->key_size = key_size; - info->cmp = cmp; - info->getseq=getseq; /* FIXME - what about multi-level stuff ?? */ info->rset = nmem_malloc(rnew->nmem,rset_no * sizeof(*info->rset)); memcpy(info->rset, rset, rset_no * sizeof(*info->rset)); @@ -103,7 +90,6 @@ RSET rsprox_create( NMEM nmem, int key_size, info->exclusion=exclusion; info->relation=relation; info->distance=distance; - info->rfd_list = NULL; rnew->priv=info; return rnew; } @@ -114,124 +100,76 @@ static void r_delete (RSET ct) struct rset_prox_info *info = (struct rset_prox_info *) ct->priv; int i; - assert (info->rfd_list == NULL); for (i = 0; irset_no; i++) rset_delete (info->rset[i]); -/* xfree (info->rset); */ /* nmems! */ -/* xfree (info); */ } -#if 0 -static void *r_create (RSET ct, const struct rset_control *sel, void *parms) -{ - rset_prox_parms *prox_parms = (rset_prox_parms *) parms; - struct rset_prox_info *info; - - info = (struct rset_prox_info *) xmalloc (sizeof(*info)); - memcpy(&info->p, prox_parms, sizeof(struct rset_prox_parms)); - assert(info->p.rset_no >= 2); - info->p.rset = xmalloc(info->p.rset_no * sizeof(*info->p.rset)); - memcpy(info->p.rset, prox_parms->rset, - info->p.rset_no * sizeof(*info->p.rset)); - info->rfd_list = NULL; - return info; -} -#endif static RSFD r_open (RSET ct, int flag) { struct rset_prox_info *info = (struct rset_prox_info *) ct->priv; - struct rset_prox_rfd *rfd; + RSFD rfd; + struct rset_prox_rfd *p; int i; if (flag & RSETF_WRITE) { - logf (LOG_FATAL, "prox set type is read-only"); + yaz_log (YLOG_FATAL, "prox set type is read-only"); return NULL; } - rfd = (struct rset_prox_rfd *) xmalloc (sizeof(*rfd)); - logf(LOG_DEBUG,"rsprox (%s) open [%p]", ct->control->desc, rfd); - rfd->next = info->rfd_list; - info->rfd_list = rfd; - rfd->info = info; - - rfd->more = xmalloc (sizeof(*rfd->more) * info->rset_no); - - rfd->buf = xmalloc(sizeof(*rfd->buf) * info->rset_no); - for (i = 0; i < info->rset_no; i++) - rfd->buf[i] = xmalloc (info->key_size); - - rfd->rfd = xmalloc(sizeof(*rfd->rfd) * info->rset_no); - for (i = 0; i < info->rset_no; i++) - rfd->rfd[i] = rset_open (info->rset[i], RSETF_READ); + rfd = rfd_create_base(ct); + if (rfd->priv) + p=(struct rset_prox_rfd *)(rfd->priv); + else { + p = (struct rset_prox_rfd *) nmem_malloc (ct->nmem,sizeof(*p)); + rfd->priv=p; + p->more = nmem_malloc (ct->nmem,sizeof(*p->more) * info->rset_no); + p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * info->rset_no); + p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * info->rset_no); + for (i = 0; i < info->rset_no; i++) + { + p->buf[i] = nmem_malloc(ct->nmem,ct->keycontrol->key_size); + p->terms[i] = 0; + } + p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * info->rset_no); + } + yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d", + ct->control->desc, rfd, info->rset_no); - for (i = 0; i < info->rset_no; i++) - rfd->more[i] = rset_read (info->rset[i], rfd->rfd[i], - rfd->buf[i]); - rfd->hits=0; + for (i = 0; i < info->rset_no; i++) { + p->rfd[i] = rset_open (info->rset[i], RSETF_READ); + p->more[i] = rset_read (p->rfd[i], p->buf[i], &p->terms[i]); + } + p->hits=0; return rfd; } static void r_close (RSFD rfd) { - struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info; - struct rset_prox_rfd **rfdp; + struct rset_prox_info *info = (struct rset_prox_info *)(rfd->rset->priv); + struct rset_prox_rfd *p=(struct rset_prox_rfd *)(rfd->priv); - for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) - if (*rfdp == rfd) - { - int i; - for (i = 0; irset_no; i++) - xfree ((*rfdp)->buf[i]); - xfree ((*rfdp)->buf); - xfree ((*rfdp)->more); - - for (i = 0; irset_no; i++) - rset_close (info->rset[i], (*rfdp)->rfd[i]); - xfree ((*rfdp)->rfd); - - *rfdp = (*rfdp)->next; - xfree (rfd); - return; - } - logf (LOG_FATAL, "r_close but no rfd match!"); - assert (0); -} - -static void r_rewind (RSFD rfd) -{ - struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info; - struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd; int i; - - logf (LOG_DEBUG, "rsprox_rewind"); - - for (i = 0; i < info->rset_no; i++) - { - rset_rewind (info->rset[i], p->rfd[i]); - p->more[i] = rset_read (info->rset[i], p->rfd[i], p->buf[i]); - } - p->hits=0; + for (i = 0; irset_no; i++) + rset_close (p->rfd[i]); + rfd_delete_base(rfd); } -static int r_forward (RSET ct, RSFD rfd, void *buf, - int (*cmpfunc)(const void *p1, const void *p2), - const void *untilbuf) +static int r_forward (RSFD rfd, void *buf, TERMID *term, const void *untilbuf) { - /* Note: CT is not used. We _can_ pass NULL for it */ - struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info; - struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd; + struct rset_prox_info *info = (struct rset_prox_info *)(rfd->rset->priv); + struct rset_prox_rfd *p=(struct rset_prox_rfd *)(rfd->priv); + const struct key_control *kctrl=rfd->rset->keycontrol; int cmp=0; int i; if (untilbuf) { - /* it's enough to forward first one. Other will follow - automatically */ - if ( p->more[0] && ((cmpfunc)(untilbuf, p->buf[0]) >= 2) ) - p->more[0] = rset_forward(info->rset[0], p->rfd[0], - p->buf[0], info->cmp, - untilbuf); + /* it is enough to forward first one. Other will follow. */ + if ( p->more[0] && /* was: cmp >=2 */ + ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) + p->more[0] = rset_forward(p->rfd[0], p->buf[0], + &p->terms[0], untilbuf); } if (info->ordered && info->relation == 3 && info->exclusion == 0 && info->distance == 1) @@ -242,41 +180,41 @@ static int r_forward (RSET ct, RSFD rfd, void *buf, { if (!p->more[i]) { - p->more[0] = 0; /* saves us a goto out of while loop. */ + p->more[0] = 0; /* saves us a goto out of while loop. */ break; } - cmp = (*info->cmp) (p->buf[i], p->buf[i-1]); - if (cmp > 1) + cmp = (*kctrl->cmp) (p->buf[i], p->buf[i-1]); + if (cmp >= rfd->rset->scope ) /* cmp>1 */ { - p->more[i-1] = rset_forward (info->rset[i-1], - p->rfd[i-1], + p->more[i-1] = rset_forward (p->rfd[i-1], p->buf[i-1], - info->cmp, + &p->terms[i-1], p->buf[i]); break; } - else if (cmp == 1) + else if ( cmp>0 ) /* cmp == 1*/ { - if ((*info->getseq)(p->buf[i-1]) +1 != - (*info->getseq)(p->buf[i])) - { - p->more[i-1] = rset_read ( info->rset[i-1], - p->rfd[i-1], p->buf[i-1]); + if ((*kctrl->getseq)(p->buf[i-1]) +1 != + (*kctrl->getseq)(p->buf[i])) + { /* FIXME - We need more flexible multilevel stuff */ + p->more[i-1] = rset_read ( p->rfd[i-1], p->buf[i-1], + &p->terms[i-1]); break; } } else { - p->more[i] = rset_forward (info->rset[i], p->rfd[i], - p->buf[i], info->cmp, - p->buf[i-1]); + p->more[i] = rset_forward (p->rfd[i], + p->buf[i], &p->terms[i], p->buf[i-1]); break; } } - if (i == p->info->rset_no) + if (i == info->rset_no) { - memcpy (buf, p->buf[0], info->key_size); - p->more[0] = rset_read (info->rset[0], p->rfd[0], p->buf[0]); + memcpy (buf, p->buf[0], kctrl->key_size); + if (term) + *term=p->terms[0]; + p->more[0] = rset_read (p->rfd[0], p->buf[0], &p->terms[0]); p->hits++; return 1; } @@ -286,28 +224,28 @@ static int r_forward (RSET ct, RSFD rfd, void *buf, { while (p->more[0] && p->more[1]) { - int cmp = (*info->cmp)(p->buf[0], p->buf[1]); - if (cmp < -1) - p->more[0] = rset_forward (info->rset[0], p->rfd[0], - p->buf[0], info->cmp, p->buf[0]); - else if (cmp > 1) - p->more[1] = rset_forward (info->rset[1], p->rfd[1], - p->buf[1], info->cmp, p->buf[1]); + int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]); + if ( cmp <= - rfd->rset->scope) /* cmp<-1*/ + p->more[0] = rset_forward (p->rfd[0], p->buf[0], + &p->terms[0],p->buf[1]); + else if ( cmp >= rfd->rset->scope ) /* cmp>1 */ + p->more[1] = rset_forward (p->rfd[1], p->buf[1], + &p->terms[1],p->buf[0]); else { - int seqno[500]; + zint seqno[500]; /* FIXME - why 500 ?? */ int n = 0; - seqno[n++] = (*info->getseq)(p->buf[0]); - while ((p->more[0] = rset_read (info->rset[0], p->rfd[0], - p->buf[0])) >= -1 && + seqno[n++] = (*kctrl->getseq)(p->buf[0]); + while ((p->more[0] = rset_read (p->rfd[0], + p->buf[0], &p->terms[0])) >= -1 && p->more[0] <= -1) if (n < 500) - seqno[n++] = (*info->getseq)(p->buf[0]); + seqno[n++] = (*kctrl->getseq)(p->buf[0]); for (i = 0; igetseq)(p->buf[1]) - seqno[i]; + int diff = (*kctrl->getseq)(p->buf[1]) - seqno[i]; int excl = info->exclusion; if (!info->ordered && diff < 0) diff = -diff; @@ -340,16 +278,16 @@ static int r_forward (RSET ct, RSFD rfd, void *buf, } if (excl) { - memcpy (buf, p->buf[1], info->key_size); - - p->more[1] = rset_read (info->rset[1], - p->rfd[1], p->buf[1]); + memcpy (buf, p->buf[1], kctrl->key_size); + if (term) + *term=p->terms[1]; + p->more[1] = rset_read ( p->rfd[1], p->buf[1], + &p->terms[1]); p->hits++; return 1; } } - p->more[1] = rset_read (info->rset[1], p->rfd[1], - p->buf[1]); + p->more[1] = rset_read (p->rfd[1], p->buf[1],&p->terms[1]); } } } @@ -357,32 +295,31 @@ static int r_forward (RSET ct, RSFD rfd, void *buf, } -static int r_read (RSFD rfd, void *buf) +static int r_read (RSFD rfd, void *buf, TERMID *term) { - { double cur,tot; r_pos(rfd,&cur,&tot); } /*!*/ - return r_forward(0, rfd, buf, 0, 0); + return r_forward(rfd, buf, term, 0); } static int r_write (RSFD rfd, const void *buf) { - logf (LOG_FATAL, "prox set type is read-only"); + yaz_log (YLOG_FATAL, "prox set type is read-only"); return -1; } static void r_pos (RSFD rfd, double *current, double *total) { - struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info; - struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd; + struct rset_prox_info *info = (struct rset_prox_info *)(rfd->rset->priv); + struct rset_prox_rfd *p=(struct rset_prox_rfd *)(rfd->priv); int i; - double cur,tot=-1; - double scur=0,stot=0; - double r; + double r = 0.0; + double cur, tot = -1.0; + double scur = 0.0, stot = 0.0; - logf (LOG_DEBUG, "rsprox_pos"); + yaz_log (YLOG_DEBUG, "rsprox_pos"); for (i = 0; i < info->rset_no; i++) { - rset_pos(info->rset[i], p->rfd[i], &cur, &tot); + rset_pos(p->rfd[i], &cur, &tot); if (tot>0) { scur += cur; stot += tot; @@ -399,6 +336,18 @@ static void r_pos (RSFD rfd, double *current, double *total) *current=p->hits; *total=*current/r ; } - logf(LOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ", + yaz_log(YLOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ", i,*current, *total, r); } + + + +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + struct rset_prox_info *info = + (struct rset_prox_info *) ct->priv; + int i; + for (i=0;irset_no;i++) + rset_getterms(info->rset[i], terms, maxterms, curterm); +} +