X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=rset%2Frsmultiandor.c;h=ac728b1173b219ccd7779142439a0811e7b6a733;hp=b548a1b94eefd57bcc6a13f5646f7cc8dd79e218;hb=fc94f3546d759ddb144f879ca9e6fa60f13df292;hpb=38f56500d2a26ab515dab1c453e5fd7d1000b99b diff --git a/rset/rsmultiandor.c b/rset/rsmultiandor.c index b548a1b..ac728b1 100644 --- a/rset/rsmultiandor.c +++ b/rset/rsmultiandor.c @@ -1,4 +1,4 @@ -/* $Id: rsmultiandor.c,v 1.1 2004-09-28 13:06:35 heikki Exp $ +/* $Id: rsmultiandor.c,v 1.9 2004-11-04 13:54:08 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -47,23 +47,23 @@ static RSFD r_open_and (RSET ct, int flag); static RSFD r_open_or (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); -static void r_rewind (RSFD rfd); -static int r_read_and (RSFD rfd, void *buf); -static int r_read_or (RSFD rfd, void *buf); +static int r_read_and (RSFD rfd, void *buf, TERMID *term); +static int r_read_or (RSFD rfd, void *buf, TERMID *term); static int r_write (RSFD rfd, const void *buf); -static int r_forward_and(RSFD rfd, void *buf, +static int r_forward_and(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); -static int r_forward_or(RSFD rfd, void *buf, +static int r_forward_or(RSFD rfd, void *buf, TERMID *term, const void *untilbuf); static void r_pos (RSFD rfd, double *current, double *total); +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm); static const struct rset_control control_or = { "multi-or", r_delete, + r_get_terms, r_open_or, r_close, - r_rewind, r_forward_or, r_pos, r_read_or, @@ -73,9 +73,9 @@ static const struct rset_control control_and = { "multi-and", r_delete, + r_get_terms, r_open_and, r_close, - r_rewind, r_forward_and, r_pos, r_read_and, @@ -97,6 +97,7 @@ struct heap_item { RSFD fd; void *buf; RSET rset; + TERMID term; }; struct heap { @@ -117,11 +118,17 @@ struct rset_multiandor_info { struct rset_multiandor_rfd { int flag; struct heap_item *items; /* we alloc and free them here */ - HEAP h; + HEAP h; /* and move around here */ zint hits; /* returned so far */ int eof; /* seen the end of it */ + int tailcount; /* how many items are tailing */ + char *tailbits; }; +static int log_level=0; +static int log_level_initialized=0; + + /* Heap functions ***********************/ #if 0 @@ -130,13 +137,13 @@ static void heap_dump_item( HEAP h, int i, int level) { if (i>h->heapnum) return; (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot); - logf(LOG_LOG," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "", + logf(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "", &(h->heap[i]), h->heap[i]->buf, cur,tot ); heap_dump_item(h, 2*i, level+1); heap_dump_item(h, 2*i+1, level+1); } static void heap_dump( HEAP h,char *msg) { - logf(LOG_LOG, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax); + logf(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax); heap_dump_item(h,1,1); } #endif @@ -240,6 +247,18 @@ static void heap_destroy (HEAP h) /* nothing to delete, all is nmem'd, and will go away in due time */ } +int compare_ands(const void *x, const void *y) +{ /* used in qsort to get the multi-and args in optimal order */ + /* that is, those with fewest occurrences first */ + const struct heap_item *hx=x; + const struct heap_item *hy=y; + double cur, totx, toty; + rset_pos(hx->fd, &cur, &totx); + rset_pos(hy->fd, &cur, &toty); + if ( totx > toty +0.5 ) return 1; + if ( totx < toty -0.5 ) return -1; + return 0; /* return totx - toty, except for overflows and rounding */ +} /* Creating and deleting rsets ***********************/ @@ -247,8 +266,13 @@ static RSET rsmulti_andor_create( NMEM nmem, const struct key_control *kcontrol, int scope, int no_rsets, RSET* rsets, const struct rset_control *ctrl) { - RSET rnew=rset_create_base(ctrl, nmem,kcontrol, scope); + RSET rnew=rset_create_base(ctrl, nmem,kcontrol, scope,0); struct rset_multiandor_info *info; + if (!log_level_initialized) + { + log_level=yaz_log_module_level("rsmultiandor"); + log_level_initialized=1; + } info = (struct rset_multiandor_info *) nmem_malloc(rnew->nmem,sizeof(*info)); info->no_rsets=no_rsets; info->rsets=(RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets)); @@ -279,6 +303,7 @@ static void r_delete (RSET ct) rset_delete(info->rsets[i]); } + /* Opening and closing fd's on them *********************/ static RSFD r_open_andor (RSET ct, int flag, int is_and) @@ -291,20 +316,26 @@ static RSFD r_open_andor (RSET ct, int flag, int is_and) if (flag & RSETF_WRITE) { - logf (LOG_FATAL, "multior set type is read-only"); + logf (LOG_FATAL, "multiandor set type is read-only"); return NULL; } rfd=rfd_create_base(ct); if (rfd->priv) { p=(struct rset_multiandor_rfd *)rfd->priv; - heap_clear(p->h); + if (!is_and) + heap_clear(p->h); assert(p->items); /* all other pointers shouls already be allocated, in right sizes! */ } else { p = (struct rset_multiandor_rfd *) nmem_malloc (ct->nmem,sizeof(*p)); rfd->priv=p; - p->h = heap_create( ct->nmem, info->no_rsets, kctrl); + p->h=0; + p->tailbits=0; + if (is_and) + p->tailbits=nmem_malloc(ct->nmem, info->no_rsets*sizeof(char) ); + else + p->h = heap_create( ct->nmem, info->no_rsets, kctrl); p->items=(struct heap_item *) nmem_malloc(ct->nmem, info->no_rsets*sizeof(*p->items)); for (i=0; ino_rsets; i++){ @@ -315,18 +346,21 @@ static RSFD r_open_andor (RSET ct, int flag, int is_and) p->flag = flag; p->hits=0; p->eof=0; + p->tailcount=0; if (is_and) { /* read the array and sort it */ for (i=0; ino_rsets; i++){ p->items[i].fd=rset_open(info->rsets[i],RSETF_READ); - if ( !rset_read(p->items[i].fd, p->items[i].buf) ) + if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term)) p->eof=1; + p->tailbits[i]=0; } + qsort(p->items, info->no_rsets, sizeof(p->items[0]), compare_ands); } else { /* fill the heap for ORing */ for (i=0; ino_rsets; i++){ p->items[i].fd=rset_open(info->rsets[i],RSETF_READ); - if ( rset_read(p->items[i].fd, p->items[i].buf) ) + if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term)) heap_insert(p->h, &(p->items[i])); } } @@ -346,11 +380,13 @@ static RSFD r_open_and (RSET ct, int flag) static void r_close (RSFD rfd) { - struct rset_multiandor_info *info=(struct rset_multiandor_info *)(rfd->rset->priv); + struct rset_multiandor_info *info= + (struct rset_multiandor_info *)(rfd->rset->priv); struct rset_multiandor_rfd *p=(struct rset_multiandor_rfd *)(rfd->priv); int i; - heap_destroy (p->h); + if (p->h) + heap_destroy (p->h); for (i = 0; ino_rsets; i++) if (p->items[i].fd) rset_close(p->items[i].fd); @@ -359,24 +395,44 @@ static void r_close (RSFD rfd) -static int r_forward_or(RSFD rfd, void *buf, const void *untilbuf) +static int r_forward_or(RSFD rfd, void *buf, + TERMID *term,const void *untilbuf) +{ /* while heap head behind untilbuf, forward it and rebalance heap */ + struct rset_multiandor_rfd *p=rfd->priv; + const struct key_control *kctrl=rfd->rset->keycontrol; + if (heap_empty(p->h)) + return 0; + while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope ) + { + if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf, + &p->h->heap[1]->term, untilbuf)) + heap_balance(p->h); + else + { + heap_delete(p->h); + if (heap_empty(p->h)) + return 0; + } + + } + return r_read_or(rfd,buf,term); +} + + +static int r_read_or (RSFD rfd, void *buf, TERMID *term) { struct rset_multiandor_rfd *mrfd=rfd->priv; const struct key_control *kctrl=rfd->rset->keycontrol; - struct heap_item it; + struct heap_item *it; int rdres; if (heap_empty(mrfd->h)) return 0; - it = *(mrfd->h->heap[1]); - memcpy(buf,it.buf, kctrl->key_size); - /* FIXME - This is not right ! */ - /* If called with an untilbuf, we need to compare to that, and */ - /* forward until we are somewhere! */ + it = mrfd->h->heap[1]; + memcpy(buf,it->buf, kctrl->key_size); + if (term) + *term=it->term; (mrfd->hits)++; - if (untilbuf) - rdres=rset_forward(it.fd, it.buf, untilbuf); - else - rdres=rset_read(it.fd, it.buf); + rdres=rset_read(it->fd, it->buf, &it->term); if ( rdres ) heap_balance(mrfd->h); else @@ -385,18 +441,124 @@ static int r_forward_or(RSFD rfd, void *buf, const void *untilbuf) } -static int r_read_or (RSFD rfd, void *buf) -{ - return r_forward_or(rfd, buf,0); +static int r_read_and (RSFD rfd, void *buf, TERMID *term) +{ /* Has to return all hits where each item points to the */ + /* same sysno (scope), in order. Keep an extra key (hitkey) */ + /* as long as all records do not point to hitkey, forward */ + /* them, and update hitkey to be the highest seen so far. */ + /* (if any item eof's, mark eof, and return 0 thereafter) */ + /* Once a hit has been found, scan all items for the smallest */ + /* value. Mark all as being in the tail. Read next from that */ + /* item, and if not in the same record, clear its tail bit */ + struct rset_multiandor_rfd *p=rfd->priv; + const struct key_control *kctrl=rfd->rset->keycontrol; + struct rset_multiandor_info *info=rfd->rset->priv; + int i, mintail; + int cmp; + + while (1) { + if (p->tailcount) + { /* we are tailing, find lowest tail and return it */ + mintail=0; + while ((mintailno_rsets) && !p->tailbits[mintail]) + mintail++; /* first tail */ + for (i=mintail+1;ino_rsets;i++) + { + if (p->tailbits[i]) + { + cmp=(*kctrl->cmp)(p->items[i].buf,p->items[mintail].buf); + if (cmp<0) + mintail=i; + } + } + /* return the lowest tail */ + memcpy(buf, p->items[mintail].buf, kctrl->key_size); + if (term) + *term=p->items[mintail].term; + if (!rset_read(p->items[mintail].fd, p->items[mintail].buf, + &p->items[mintail].term)) + { + p->eof=1; /* game over, once tails have been returned */ + p->tailbits[mintail]=0; + (p->tailcount)--; + return 1; + } + /* still a tail? */ + cmp=(*kctrl->cmp)(p->items[mintail].buf,buf); + if (cmp >= rfd->rset->scope){ + p->tailbits[mintail]=0; + (p->tailcount)--; + } + return 1; + } + /* not tailing, forward until all reocrds match, and set up */ + /* as tails. the earlier 'if' will then return the hits */ + if (p->eof) + return 0; /* nothing more to see */ + i=1; /* assume items[0] is highest up */ + while (ino_rsets) { + cmp=(*kctrl->cmp)(p->items[0].buf,p->items[i].buf); + if (cmp<=-rfd->rset->scope) { /* [0] was behind, forward it */ + if (!rset_forward(p->items[0].fd, p->items[0].buf, + &p->items[0].term, p->items[i].buf)) + { + p->eof=1; /* game over */ + return 0; + } + i=0; /* start frowarding from scratch */ + } else if (cmp>=rfd->rset->scope) + { /* [0] was ahead, forward i */ + if (!rset_forward(p->items[i].fd, p->items[i].buf, + &p->items[i].term, p->items[0].buf)) + { + p->eof=1; /* game over */ + return 0; + } + } else + i++; + } /* while i */ + /* if we get this far, all rsets are now within +- scope of [0] */ + /* ergo, we have a hit. Mark them all as tailing, and let the */ + /* upper 'if' return the hits in right order */ + for (i=0; ino_rsets;i++) + p->tailbits[i]=1; + p->tailcount=info->no_rsets; + } /* while 1 */ } -static int r_read_and (RSFD rfd, void *buf) -{ - return 0; -} -static int r_forward_and(RSFD rfd, void *buf, const void *untilbuf) -{ - return 0; + +static int r_forward_and(RSFD rfd, void *buf, TERMID *term, + const void *untilbuf) +{ + struct rset_multiandor_rfd *p=rfd->priv; + const struct key_control *kctrl=rfd->rset->keycontrol; + struct rset_multiandor_info *info=rfd->rset->priv; + int i; + int cmp; + int killtail=0; + + for (i=0; ino_rsets;i++) + { + cmp=(*kctrl->cmp)(p->items[i].buf,untilbuf); + if ( cmp <= -rfd->rset->scope ) + { + killtail=1; /* we are moving to a different hit */ + if (!rset_forward(p->items[i].fd, p->items[i].buf, + &p->items[i].term, untilbuf)) + { + p->eof=1; /* game over */ + p->tailcount=0; + return 0; + } + } + } + if (killtail) + { + for (i=0; ino_rsets;i++) + p->tailbits[i]=0; + p->tailcount=0; + } + return r_read_and(rfd,buf,term); } static void r_pos (RSFD rfd, double *current, double *total) @@ -409,28 +571,46 @@ static void r_pos (RSFD rfd, double *current, double *total) int i; for (i=0; ino_rsets; i++){ rset_pos(mrfd->items[i].fd, &cur, &tot); - logf(LOG_LOG, "r_pos: %d %0.1f %0.1f", i, cur,tot); + logf(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot); scur += cur; stot += tot; } if (stot <1.0) { /* nothing there */ *current=0; *total=0; + logf(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total); return; } *current=mrfd->hits; *total=*current*stot/scur; + logf(log_level, "r_pos: = %0.1f %0.1f", *current, *total); } -static void r_rewind (RSFD rfd) -{ - assert(!"rewind not implemented yet"); - /* FIXME - rewind all parts, rebalance heap, clear hits */ -} static int r_write (RSFD rfd, const void *buf) { logf (LOG_FATAL, "multior set type is read-only"); return -1; } + +static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm) + /* Special case: Some multi-ors have all terms pointing to the same */ + /* term. We do not want to duplicate those. Other multiors (and ands) */ + /* have different terms under them. Those we want. */ +{ + struct rset_multiandor_info *info = + (struct rset_multiandor_info *) ct->priv; + int firstterm= *curterm; + int i; + for (i=0;ino_rsets;i++) + { + rset_getterms(info->rsets[i], terms, maxterms, curterm); + if ( ( (*curterm) > firstterm+1 ) && + ( (*curterm) <= maxterms ) && + ( terms[(*curterm)-1] == terms[firstterm] ) ) + (*curterm)--; /* forget the term, seen that before */ + } +} + +