X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=rset%2Frsmultior.c;h=cb755335ce89f2743db654b68b91868713e4f14f;hb=5e9aca2e8f33fe023b6b9da6df55642f96efcb50;hp=4d9520470baad4ee3f860220aba6fe9b9df8a2d0;hpb=7887042844aef0c6f3d2b711e315abe91c26d211;p=idzebra-moved-to-github.git diff --git a/rset/rsmultior.c b/rset/rsmultior.c index 4d95204..cb75533 100644 --- a/rset/rsmultior.c +++ b/rset/rsmultior.c @@ -1,4 +1,4 @@ -/* $Id: rsmultior.c,v 1.1 2004-08-16 16:17:49 heikki Exp $ +/* $Id: rsmultior.c,v 1.10 2004-09-09 10:08:06 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -30,81 +30,133 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#include +#include -static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); -static int r_read (RSFD rfd, void *buf, int *term_index); +static int r_read (RSFD rfd, void *buf); static int r_write (RSFD rfd, const void *buf); +static int r_forward(RSFD rfd, void *buf, + const void *untilbuf); +static void r_pos (RSFD rfd, double *current, double *total); static const struct rset_control control = { "multi-or", - r_create, + r_delete, r_open, r_close, - r_delete, r_rewind, - rset_default_forward, /* FIXME */ - rset_default_pos, /* FIXME */ + r_forward, + r_pos, r_read, r_write, }; const struct rset_control *rset_kind_multior = &control; +/* The heap structure: + * The rset contains a list or rsets we are ORing together + * The rfd contains a heap of heap-items, which contain + * a rfd opened to those rsets, and a buffer for one key. + * They also contain a ptr to the rset list in the rset + * itself, for practical reasons. + */ + +struct heap_item { + RSFD fd; + void *buf; + RSET rset; +}; + +struct heap { + int heapnum; + int heapmax; + const struct key_control *kctrl; + struct heap_item **heap; /* ptrs to the rfd */ +}; +typedef struct heap *HEAP; + + struct rset_multior_info { - int key_size; - int no_rec; - int (*cmp)(const void *p1, const void *p2); - ISAMC isc; - ISAMC_P *isam_positions; - - int no_isam_positions; - int no_save_positions; - struct rset_mor_rfd *rfd_list; + int no_rsets; + RSET *rsets; }; struct rset_multior_rfd { int flag; - int position; - int position_max; -/* ISAMC_PP *ispt; */ - struct rset_mor_rfd *next; - struct rset_mor_info *info; - struct trunc_info *ti; - zint *countp; - char *pbuf; + struct heap_item *items; /* we alloc and free them here */ + HEAP h; + zint hits; /* returned so far */ }; #if 0 -static void heap_swap (struct trunc_info *ti, int i1, int i2) +static void heap_dump_item( HEAP h, int i, int level) { + double cur,tot; + if (i>h->heapnum) + return; + (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot); + logf(LOG_LOG," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "", + &(h->heap[i]), h->heap[i]->buf, cur,tot ); + heap_dump_item(h, 2*i, level+1); + heap_dump_item(h, 2*i+1, level+1); +} +static void heap_dump( HEAP h,char *msg) { + logf(LOG_LOG, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax); + heap_dump_item(h,1,1); +} +#endif + +static void heap_swap (HEAP h, int x, int y) { - int swap; + struct heap_item *swap; + swap = h->heap[x]; + h->heap[x]=h->heap[y]; + h->heap[y]=swap; +} - swap = ti->ptr[i1]; - ti->ptr[i1] = ti->ptr[i2]; - ti->ptr[i2] = swap; +static int heap_cmp(HEAP h, int x, int y) +{ + return (*h->kctrl->cmp)(h->heap[x]->buf,h->heap[y]->buf); } -static void heap_delete (struct trunc_info *ti) +static int heap_empty(HEAP h) { + return ( 0==h->heapnum ); +} + +static void heap_delete (HEAP h) +{ /* deletes the first item in the heap, and balances the rest */ int cur = 1, child = 2; + h->heap[1]=0; /* been deleted */ + heap_swap (h, 1, h->heapnum--); + while (child <= h->heapnum) { + if (child < h->heapnum && heap_cmp(h,child,1+child)>0 ) + child++; + if (heap_cmp(h,cur,child) > 0) + { + heap_swap (h, cur, child); + cur = child; + child = 2*cur; + } + else + break; + } +} - heap_swap (ti, 1, ti->heapnum--); - while (child <= ti->heapnum) { - if (child < ti->heapnum && - (*ti->cmp)(ti->heap[ti->ptr[child]], - ti->heap[ti->ptr[1+child]]) > 0) +static void heap_balance (HEAP h) +{ /* The heap root element has changed value (to bigger) */ + /* swap downwards until the heap is ordered again */ + int cur = 1, child = 2; + while (child <= h->heapnum) { + if (child < h->heapnum && heap_cmp(h,child,1+child)>0 ) child++; - if ((*ti->cmp)(ti->heap[ti->ptr[cur]], - ti->heap[ti->ptr[child]]) > 0) + if (heap_cmp(h,cur,child) > 0) { - heap_swap (ti, cur, child); + heap_swap (h, cur, child); cur = child; child = 2*cur; } @@ -113,249 +165,185 @@ static void heap_delete (struct trunc_info *ti) } } -static void heap_insert (struct trunc_info *ti, const char *buf, int indx) + +static void heap_insert (HEAP h, struct heap_item *hi) { int cur, parent; - cur = ++(ti->heapnum); - memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize); - ti->indx[ti->ptr[cur]] = indx; + cur = ++(h->heapnum); + assert(cur <= h->heapmax); + h->heap[cur]=hi; parent = cur/2; - while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]], - ti->heap[ti->ptr[cur]]) > 0) + while (parent && (heap_cmp(h,parent,cur) > 0)) { - heap_swap (ti, cur, parent); + assert(parent>0); + heap_swap (h, cur, parent); cur = parent; parent = cur/2; } } + static -struct trunc_info *heap_init (int size, int key_size, - int (*cmp)(const void *p1, const void *p2)) +HEAP heap_create (NMEM nmem, int size, const struct key_control *kctrl) { - struct trunc_info *ti = (struct trunc_info *) xmalloc (sizeof(*ti)); - int i; + HEAP h = (HEAP) nmem_malloc (nmem, sizeof(*h)); + + ++size; /* heap array starts at 1 */ + h->heapnum = 0; + h->heapmax = size; + h->kctrl=kctrl; + h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap)); + h->heap[0]=0; /* not used */ + return h; +} - ++size; - ti->heapnum = 0; - ti->keysize = key_size; - ti->cmp = cmp; - ti->indx = (int *) xmalloc (size * sizeof(*ti->indx)); - ti->heap = (char **) xmalloc (size * sizeof(*ti->heap)); - ti->ptr = (int *) xmalloc (size * sizeof(*ti->ptr)); - ti->swapbuf = (char *) xmalloc (ti->keysize); - ti->tmpbuf = (char *) xmalloc (ti->keysize); - ti->buf = (char *) xmalloc (size * ti->keysize); - for (i = size; --i >= 0; ) - { - ti->ptr[i] = i; - ti->heap[i] = ti->buf + ti->keysize * i; - } - return ti; +static void heap_clear( HEAP h) +{ + assert(h); + h->heapnum=0; } -static void heap_close (struct trunc_info *ti) +static void heap_destroy (HEAP h) { - xfree (ti->buf); - xfree (ti->ptr); - xfree (ti->indx); - xfree (ti->heap); - xfree (ti->swapbuf); - xfree (ti->tmpbuf); - xfree (ti); + /* nothing to delete, all is nmem'd, and will go away in due time */ } -#endif -static void *r_create (RSET ct, const struct rset_control *sel, void *parms) + +RSET rsmultior_create( NMEM nmem, const struct key_control *kcontrol, int scope, + int no_rsets, RSET* rsets) +{ + RSET rnew=rset_create_base(&control, nmem,kcontrol, scope); + struct rset_multior_info *info; + info = (struct rset_multior_info *) nmem_malloc(rnew->nmem,sizeof(*info)); + info->no_rsets=no_rsets; + info->rsets=(RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets)); + memcpy(info->rsets,rsets,no_rsets*sizeof(*rsets)); + rnew->priv=info; + return rnew; +} + +static void r_delete (RSET ct) { - return 0; -/* - rset_multior_parms *r_parms = (rset_multior_parms *) parms; - struct rset_mor_info *info; - - ct->flags |= RSET_FLAG_VOLATILE; - info = (struct rset_mor_info *) xmalloc (sizeof(*info)); - info->key_size = r_parms->key_size; - assert (info->key_size > 1); - info->cmp = r_parms->cmp; - - info->no_save_positions = r_parms->no_save_positions; - - info->isc = r_parms->isc; - info->no_isam_positions = r_parms->no_isam_positions; - info->isam_positions = (ISAMC_P *) - xmalloc (sizeof(*info->isam_positions) * info->no_isam_positions); - memcpy (info->isam_positions, r_parms->isam_positions, - sizeof(*info->isam_positions) * info->no_isam_positions); - info->rfd_list = NULL; - - ct->no_rset_terms = 1; - ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); - ct->rset_terms[0] = r_parms->rset_term; - return info; - */ + struct rset_multior_info *info = (struct rset_multior_info *) ct->priv; + int i; + for(i=0;ino_rsets;i++) + rset_delete(info->rsets[i]); } static RSFD r_open (RSET ct, int flag) { - return 0; - /* - struct rset_mor_rfd *rfd; - struct rset_mor_info *info = (struct rset_mor_info *) ct->buf; + RSFD rfd; + struct rset_multior_rfd *p; + struct rset_multior_info *info = (struct rset_multior_info *) ct->priv; + const struct key_control *kctrl = ct->keycontrol; int i; if (flag & RSETF_WRITE) { - logf (LOG_FATAL, "m_or set type is read-only"); - return NULL; + logf (LOG_FATAL, "multior set type is read-only"); + return NULL; } - rfd = (struct rset_mor_rfd *) xmalloc (sizeof(*rfd)); - rfd->flag = flag; - rfd->next = info->rfd_list; - rfd->info = info; - info->rfd_list = rfd; - - rfd->ispt = (ISAMC_PP *) - xmalloc (sizeof(*rfd->ispt) * info->no_isam_positions); - - rfd->ti = heap_init (info->no_isam_positions, info->key_size, info->cmp); - - ct->rset_terms[0]->nn = 0; - for (i = 0; ino_isam_positions; i++) - { - rfd->ispt[i] = isc_pp_open (info->isc, info->isam_positions[i]); - - ct->rset_terms[0]->nn += isc_pp_num (rfd->ispt[i]); - - if (isc_pp_read (rfd->ispt[i], rfd->ti->tmpbuf)) - heap_insert (rfd->ti, rfd->ti->tmpbuf, i); - else - { - isc_pp_close (rfd->ispt[i]); - rfd->ispt[i] = NULL; + rfd=rfd_create_base(ct); + if (rfd->priv) { + p=(struct rset_multior_rfd *)rfd->priv; + heap_clear(p->h); + assert(p->items); + /* all other pointers shouls already be allocated, in right sizes! */ + } + else { + p = (struct rset_multior_rfd *) nmem_malloc (ct->nmem,sizeof(*p)); + rfd->priv=p; + p->h = heap_create( ct->nmem, info->no_rsets, kctrl); + p->items=(struct heap_item *) nmem_malloc(ct->nmem, + info->no_rsets*sizeof(*p->items)); + for (i=0; ino_rsets; i++){ + p->items[i].rset=info->rsets[i]; + p->items[i].buf=nmem_malloc(ct->nmem,kctrl->key_size); } } - rfd->position = info->no_save_positions; - - if (ct->no_rset_terms == 1) - rfd->countp = &ct->rset_terms[0]->count; - else - rfd->countp = 0; - rfd->pbuf = xmalloc (info->key_size); - - r_rewind (rfd); + p->flag = flag; + p->hits=0; + for (i=0; ino_rsets; i++){ + p->items[i].fd=rset_open(info->rsets[i],RSETF_READ); + if ( rset_read(p->items[i].fd, p->items[i].buf) ) + heap_insert(p->h, &(p->items[i])); + } return rfd; - */ } static void r_close (RSFD rfd) { - /* - struct rset_mor_info *info = ((struct rset_mor_rfd*)rfd)->info; - struct rset_mor_rfd **rfdp; + struct rset_multior_info *info=(struct rset_multior_info *)(rfd->rset->priv); + struct rset_multior_rfd *p=(struct rset_multior_rfd *)(rfd->priv); int i; - - for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) - if (*rfdp == rfd) - { - *rfdp = (*rfdp)->next; - - heap_close (((struct rset_mor_rfd *) rfd)->ti); - for (i = 0; ino_isam_positions; i++) - if (((struct rset_mor_rfd *) rfd)->ispt[i]) - isc_pp_close (((struct rset_mor_rfd *) rfd)->ispt[i]); - xfree (((struct rset_mor_rfd *)rfd)->ispt); - xfree (((struct rset_mor_rfd *)rfd)->pbuf); - xfree (rfd); - return; - } - logf (LOG_FATAL, "r_close but no rfd match!"); - assert (0); - */ + + heap_destroy (p->h); + for (i = 0; ino_rsets; i++) + if (p->items[i].fd) + rset_close(p->items[i].fd); + rfd_delete_base(rfd); } -static void r_delete (RSET ct) + +static void r_rewind (RSFD rfd) { - /* - struct rset_mor_info *info = (struct rset_mor_info *) ct->buf; - int i; + assert(!"rewind not implemented yet"); +} - assert (info->rfd_list == NULL); - xfree (info->isam_positions); - for (i = 0; ino_rset_terms; i++) - rset_term_destroy (ct->rset_terms[i]); - */ /* FIXME - Watch out! */ - /* - xfree (ct->rset_terms); +static int r_forward(RSFD rfd, void *buf, const void *untilbuf) +{ + struct rset_multior_rfd *mrfd=rfd->priv; + const struct key_control *kctrl=rfd->rset->keycontrol; + struct heap_item it; + int rdres; + if (heap_empty(mrfd->h)) + return 0; + it = *(mrfd->h->heap[1]); + memcpy(buf,it.buf, kctrl->key_size); + (mrfd->hits)++; + if (untilbuf) + rdres=rset_forward(it.fd, it.buf, untilbuf); + else + rdres=rset_read(it.fd, it.buf); + if ( rdres ) + heap_balance(mrfd->h); + else + heap_delete(mrfd->h); + return 1; - xfree (info); - */ } -static void r_rewind (RSFD rfd) +static int r_read (RSFD rfd, void *buf) { + return r_forward(rfd, buf,0); } - -static int r_read (RSFD rfd, void *buf, int *term_index) +static void r_pos (RSFD rfd, double *current, double *total) { - return 0; - /* - struct rset_mor_rfd *mrfd = (struct rset_mor_rfd *) rfd; - struct trunc_info *ti = mrfd->ti; - int n = ti->indx[ti->ptr[1]]; - - if (!ti->heapnum) - return 0; - *term_index = 0; - memcpy (buf, ti->heap[ti->ptr[1]], ti->keysize); - if (((struct rset_mor_rfd *) rfd)->position) - { - if (isc_pp_read (((struct rset_mor_rfd *) rfd)->ispt[n], ti->tmpbuf)) - { - heap_delete (ti); - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) - ((struct rset_mor_rfd *) rfd)->position--; - heap_insert (ti, ti->tmpbuf, n); - } - else - heap_delete (ti); - if (mrfd->countp && ( - *mrfd->countp == 0 || (*ti->cmp)(buf, mrfd->pbuf) > 1)) - { - memcpy (mrfd->pbuf, buf, ti->keysize); - (*mrfd->countp)++; - } - return 1; - } - while (1) - { - if (!isc_pp_read (((struct rset_mor_rfd *) rfd)->ispt[n], ti->tmpbuf)) - { - heap_delete (ti); - break; - } - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) - { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); - break; - } + struct rset_multior_info *info=(struct rset_multior_info *)(rfd->rset->priv); + struct rset_multior_rfd *mrfd=(struct rset_multior_rfd *)(rfd->priv); + double cur, tot; + double scur=0.0, stot=0.0; + int i; + for (i=0; ino_rsets; i++){ + rset_pos(mrfd->items[i].fd, &cur, &tot); + logf(LOG_LOG, "r_pos: %d %0.1f %0.1f", i, cur,tot); + scur += cur; + stot += tot; } - if (mrfd->countp && ( - *mrfd->countp == 0 || (*ti->cmp)(buf, mrfd->pbuf) > 1)) - { - memcpy (mrfd->pbuf, buf, ti->keysize); - (*mrfd->countp)++; + if (stot <1.0) { /* nothing there */ + *current=0; + *total=0; + return; } - return 1; - */ + *current=mrfd->hits; + *total=*current*stot/scur; } static int r_write (RSFD rfd, const void *buf) { - logf (LOG_FATAL, "mor set type is read-only"); + logf (LOG_FATAL, "multior set type is read-only"); return -1; }