From 3335cd5f7e7af06ac6ed943859c473e092d559a1 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Thu, 19 Aug 2004 12:49:14 +0000 Subject: [PATCH] the new multior. Seems to work, forwards OK, does not yet estimate pos. term counts work too! --- include/rsmultior.h | 6 +- index/trunc.c | 9 +- rset/rset.c | 10 +- rset/rsmultior.c | 370 ++++++++++++++++++++++++++++++--------------------- 4 files changed, 237 insertions(+), 158 deletions(-) diff --git a/include/rsmultior.h b/include/rsmultior.h index ff8c3ad..03055d3 100644 --- a/include/rsmultior.h +++ b/include/rsmultior.h @@ -1,4 +1,4 @@ -/* $Id: rsmultior.h,v 1.1 2004-08-16 16:17:49 heikki Exp $ +/* $Id: rsmultior.h,v 1.2 2004-08-19 12:49:14 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -36,8 +36,10 @@ typedef struct rset_multior_parms int key_size; int (*cmp)(const void *p1, const void *p2); - RSET *rsets; int no_rsets; + RSET *rsets; /* array of rsets to multi-or */ + /* allocated when creating parms, */ + /* rset will free when deleted */ RSET_TERM rset_term; diff --git a/index/trunc.c b/index/trunc.c index f13f1d9..19ba229 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,4 +1,4 @@ -/* $Id: trunc.c,v 1.34 2004-08-16 16:17:49 heikki Exp $ +/* $Id: trunc.c,v 1.35 2004-08-19 12:49:14 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -478,7 +478,7 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, term_type); return rset_create (rset_kind_isamb, &parms); } -#if 0 +#if 1 else if (no <10000 ) /* FIXME - hardcoded number */ { rset_multior_parms m_parms; @@ -493,10 +493,13 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, b_parms.key_size = sizeof(struct it_key); b_parms.cmp = key_compare_it; b_parms.is = zi->reg->isamb; - b_parms.rset_term = m_parms.rset_term; + /* FIXME - make it so that we can pass a null ptr to term */ + /* needs changes in all rsets, here and there! */ for (i=0;iname); - xfree (t->flags); - xfree (t); + if (t) { /* rsmultior uses things without terms at all ! */ + xfree (t->name); + xfree (t->flags); + xfree (t); + } } RSET_TERM rset_term_dup (RSET_TERM t) diff --git a/rset/rsmultior.c b/rset/rsmultior.c index 4d95204..6a4944c 100644 --- a/rset/rsmultior.c +++ b/rset/rsmultior.c @@ -1,4 +1,4 @@ -/* $Id: rsmultior.c,v 1.1 2004-08-16 16:17:49 heikki Exp $ +/* $Id: rsmultior.c,v 1.2 2004-08-19 12:49:15 heikki Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -39,6 +39,9 @@ static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); +static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index, + int (*cmpfunc)(const void *p1, const void *p2), + const void *untilbuf); static const struct rset_control control = { @@ -48,7 +51,7 @@ static const struct rset_control control = r_close, r_delete, r_rewind, - rset_default_forward, /* FIXME */ + r_forward, rset_default_pos, /* FIXME */ r_read, r_write, @@ -56,55 +59,115 @@ static const struct rset_control control = const struct rset_control *rset_kind_multior = &control; +/* The heap structure: + * The rset contains a list or rsets we are ORing together + * The rfd contains a heap of heap-items, which contain + * a rfd opened to those rsets, and a buffer for one key. + * They also contain a ptr to the rset list in the rset + * itself, for practical reasons. + */ + +struct heap_item { + RSFD fd; + void *buf; + RSET rset; +}; + +struct heap { + int heapnum; + int heapmax; + int keysize; + int (*cmp)(const void *p1, const void *p2); + struct heap_item **heap; /* ptrs to the rfd */ +}; +typedef struct heap *HEAP; + + struct rset_multior_info { int key_size; int no_rec; int (*cmp)(const void *p1, const void *p2); - ISAMC isc; - ISAMC_P *isam_positions; - - int no_isam_positions; - int no_save_positions; - struct rset_mor_rfd *rfd_list; + int no_rsets; + RSET *rsets; + int term_index; + struct rset_multior_rfd *rfd_list; }; struct rset_multior_rfd { int flag; - int position; - int position_max; -/* ISAMC_PP *ispt; */ - struct rset_mor_rfd *next; - struct rset_mor_info *info; - struct trunc_info *ti; + struct heap_item *items; /* we alloc and free them here */ + HEAP h; + struct rset_multior_rfd *next; + struct rset_multior_info *info; zint *countp; - char *pbuf; + char *prevvalue; /* to see if we are in another record */ }; #if 0 -static void heap_swap (struct trunc_info *ti, int i1, int i2) +static void heap_dump_item( HEAP h, int i, int level) { + double cur,tot; + if (i>h->heapnum) + return; + (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot); + logf(LOG_LOG," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "", + &(h->heap[i]), h->heap[i]->buf, cur,tot ); + heap_dump_item(h, 2*i, level+1); + heap_dump_item(h, 2*i+1, level+1); +} +static void heap_dump( HEAP h,char *msg) { + logf(LOG_LOG, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax); + heap_dump_item(h,1,1); +} +#endif + +static void heap_swap (HEAP h, int x, int y) { - int swap; + struct heap_item *swap; + swap = h->heap[x]; + h->heap[x]=h->heap[y]; + h->heap[y]=swap; +} - swap = ti->ptr[i1]; - ti->ptr[i1] = ti->ptr[i2]; - ti->ptr[i2] = swap; +static int heap_cmp(HEAP h, int x, int y) +{ + return (*h->cmp)(h->heap[x]->buf,h->heap[y]->buf); } -static void heap_delete (struct trunc_info *ti) +static int heap_empty(HEAP h) { + return ( 0==h->heapnum ); +} + +static void heap_delete (HEAP h) +{ /* deletes the first item in the heap, and balances the rest */ int cur = 1, child = 2; + h->heap[1]=0; /* been deleted */ + heap_swap (h, 1, h->heapnum--); + while (child <= h->heapnum) { + if (child < h->heapnum && heap_cmp(h,child,1+child)>0 ) + child++; + if (heap_cmp(h,cur,child) > 0) + { + heap_swap (h, cur, child); + cur = child; + child = 2*cur; + } + else + break; + } +} - heap_swap (ti, 1, ti->heapnum--); - while (child <= ti->heapnum) { - if (child < ti->heapnum && - (*ti->cmp)(ti->heap[ti->ptr[child]], - ti->heap[ti->ptr[1+child]]) > 0) +static void heap_balance (HEAP h) +{ /* The heap root element has changed value (to bigger) */ + /* swap downwards until the heap is ordered again */ + int cur = 1, child = 2; + while (child <= h->heapnum) { + if (child < h->heapnum && heap_cmp(h,child,1+child)>0 ) child++; - if ((*ti->cmp)(ti->heap[ti->ptr[cur]], - ti->heap[ti->ptr[child]]) > 0) + if (heap_cmp(h,cur,child) > 0) { - heap_swap (ti, cur, child); + heap_swap (h, cur, child); cur = child; child = 2*cur; } @@ -113,147 +176,110 @@ static void heap_delete (struct trunc_info *ti) } } -static void heap_insert (struct trunc_info *ti, const char *buf, int indx) + +static void heap_insert (HEAP h, struct heap_item *hi) { int cur, parent; - cur = ++(ti->heapnum); - memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize); - ti->indx[ti->ptr[cur]] = indx; + cur = ++(h->heapnum); + assert(cur <= h->heapmax); + h->heap[cur]=hi; parent = cur/2; - while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]], - ti->heap[ti->ptr[cur]]) > 0) + while (parent && (heap_cmp(h,parent,cur) > 0)) { - heap_swap (ti, cur, parent); + assert(parent>0); + heap_swap (h, cur, parent); cur = parent; parent = cur/2; } } + static -struct trunc_info *heap_init (int size, int key_size, - int (*cmp)(const void *p1, const void *p2)) +HEAP heap_create (int size, int key_size, + int (*cmp)(const void *p1, const void *p2)) { - struct trunc_info *ti = (struct trunc_info *) xmalloc (sizeof(*ti)); - int i; - - ++size; - ti->heapnum = 0; - ti->keysize = key_size; - ti->cmp = cmp; - ti->indx = (int *) xmalloc (size * sizeof(*ti->indx)); - ti->heap = (char **) xmalloc (size * sizeof(*ti->heap)); - ti->ptr = (int *) xmalloc (size * sizeof(*ti->ptr)); - ti->swapbuf = (char *) xmalloc (ti->keysize); - ti->tmpbuf = (char *) xmalloc (ti->keysize); - ti->buf = (char *) xmalloc (size * ti->keysize); - for (i = size; --i >= 0; ) - { - ti->ptr[i] = i; - ti->heap[i] = ti->buf + ti->keysize * i; - } - return ti; + HEAP h = (HEAP) xmalloc (sizeof(*h)); + + ++size; /* heap array starts at 1 */ + h->heapnum = 0; + h->heapmax = size; + h->keysize = key_size; + h->cmp = cmp; + h->heap = (struct heap_item**) xmalloc((size)*sizeof(*h->heap)); + h->heap[0]=0; /* not used */ + return h; } -static void heap_close (struct trunc_info *ti) +static void heap_destroy (HEAP h) { - xfree (ti->buf); - xfree (ti->ptr); - xfree (ti->indx); - xfree (ti->heap); - xfree (ti->swapbuf); - xfree (ti->tmpbuf); - xfree (ti); + xfree (h->heap); /* safe, they all point to the rfd */ + xfree (h); } -#endif + static void *r_create (RSET ct, const struct rset_control *sel, void *parms) { - return 0; -/* rset_multior_parms *r_parms = (rset_multior_parms *) parms; - struct rset_mor_info *info; + struct rset_multior_info *info; ct->flags |= RSET_FLAG_VOLATILE; - info = (struct rset_mor_info *) xmalloc (sizeof(*info)); + /* FIXME - Remove the whole flags thing, from all rsets */ + info = (struct rset_multior_info *) xmalloc (sizeof(*info)); info->key_size = r_parms->key_size; assert (info->key_size > 1); info->cmp = r_parms->cmp; - - info->no_save_positions = r_parms->no_save_positions; - - info->isc = r_parms->isc; - info->no_isam_positions = r_parms->no_isam_positions; - info->isam_positions = (ISAMC_P *) - xmalloc (sizeof(*info->isam_positions) * info->no_isam_positions); - memcpy (info->isam_positions, r_parms->isam_positions, - sizeof(*info->isam_positions) * info->no_isam_positions); - info->rfd_list = NULL; - + info->no_rsets= r_parms->no_rsets; + info->rsets=r_parms->rsets; /* now we own it! */ + info->rfd_list=0; + info->term_index=0 ; /* r_parms->rset_term; */ /*??*/ /*FIXME */ ct->no_rset_terms = 1; ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); ct->rset_terms[0] = r_parms->rset_term; return info; - */ } static RSFD r_open (RSET ct, int flag) { - return 0; - /* - struct rset_mor_rfd *rfd; - struct rset_mor_info *info = (struct rset_mor_info *) ct->buf; + struct rset_multior_rfd *rfd; + struct rset_multior_info *info = (struct rset_multior_info *) ct->buf; int i; + int dummy_termindex; if (flag & RSETF_WRITE) { - logf (LOG_FATAL, "m_or set type is read-only"); - return NULL; + logf (LOG_FATAL, "multior set type is read-only"); + return NULL; } - rfd = (struct rset_mor_rfd *) xmalloc (sizeof(*rfd)); + rfd = (struct rset_multior_rfd *) xmalloc (sizeof(*rfd)); rfd->flag = flag; rfd->next = info->rfd_list; rfd->info = info; info->rfd_list = rfd; - - rfd->ispt = (ISAMC_PP *) - xmalloc (sizeof(*rfd->ispt) * info->no_isam_positions); - - rfd->ti = heap_init (info->no_isam_positions, info->key_size, info->cmp); - - ct->rset_terms[0]->nn = 0; - for (i = 0; ino_isam_positions; i++) - { - rfd->ispt[i] = isc_pp_open (info->isc, info->isam_positions[i]); - - ct->rset_terms[0]->nn += isc_pp_num (rfd->ispt[i]); - - if (isc_pp_read (rfd->ispt[i], rfd->ti->tmpbuf)) - heap_insert (rfd->ti, rfd->ti->tmpbuf, i); - else - { - isc_pp_close (rfd->ispt[i]); - rfd->ispt[i] = NULL; - } - } - rfd->position = info->no_save_positions; - - if (ct->no_rset_terms == 1) - rfd->countp = &ct->rset_terms[0]->count; + if (ct->no_rset_terms==1) + rfd->countp=&ct->rset_terms[0]->count; else - rfd->countp = 0; - rfd->pbuf = xmalloc (info->key_size); - - r_rewind (rfd); + rfd->countp=0; + rfd->h = heap_create( info->no_rsets, info->key_size, info->cmp); + rfd->prevvalue=0; + rfd->items=(struct heap_item *) xmalloc(info->no_rsets*sizeof(*rfd->items)); + for (i=0; ino_rsets; i++){ + rfd->items[i].rset=info->rsets[i]; + rfd->items[i].buf=xmalloc(info->key_size); + rfd->items[i].fd=rset_open(info->rsets[i],RSETF_READ); +/* if (item_readbuf(&(rfd->items[i]))) */ + if ( rset_read(rfd->items[i].rset, rfd->items[i].fd, + rfd->items[i].buf, &dummy_termindex) ) + heap_insert(rfd->h, &(rfd->items[i])); + } return rfd; - */ } static void r_close (RSFD rfd) { - /* - struct rset_mor_info *info = ((struct rset_mor_rfd*)rfd)->info; - struct rset_mor_rfd **rfdp; + struct rset_multior_rfd *mrfd = (struct rset_multior_rfd *) rfd; + struct rset_multior_info *info = mrfd->info; + struct rset_multior_rfd **rfdp; int i; for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) @@ -261,49 +287,92 @@ static void r_close (RSFD rfd) { *rfdp = (*rfdp)->next; - heap_close (((struct rset_mor_rfd *) rfd)->ti); - for (i = 0; ino_isam_positions; i++) - if (((struct rset_mor_rfd *) rfd)->ispt[i]) - isc_pp_close (((struct rset_mor_rfd *) rfd)->ispt[i]); - xfree (((struct rset_mor_rfd *)rfd)->ispt); - xfree (((struct rset_mor_rfd *)rfd)->pbuf); - xfree (rfd); + heap_destroy (mrfd->h); + for (i = 0; ino_rsets; i++) { + if (mrfd->items[i].fd) + rset_close(info->rsets[i],mrfd->items[i].fd); + xfree(mrfd->items[i].buf); + } + xfree(mrfd->items); + if (mrfd->prevvalue) + xfree(mrfd->prevvalue); + xfree(mrfd); return; } logf (LOG_FATAL, "r_close but no rfd match!"); assert (0); - */ } static void r_delete (RSET ct) { - /* - struct rset_mor_info *info = (struct rset_mor_info *) ct->buf; + struct rset_multior_info *info = (struct rset_multior_info *) ct->buf; int i; assert (info->rfd_list == NULL); - xfree (info->isam_positions); + for(i=0;ino_rsets;i++) + rset_delete(info->rsets[i]); + xfree(info->rsets); + xfree(info); - for (i = 0; ino_rset_terms; i++) - rset_term_destroy (ct->rset_terms[i]); - */ /* FIXME - Watch out! */ - /* + for (i = 0; ino_rset_terms; i++) /* usually only 1 */ + rset_term_destroy (ct->rset_terms[i]); xfree (ct->rset_terms); - - xfree (info); - */ } static void r_rewind (RSFD rfd) { + assert(!"rewind not implemented yet"); } -static int r_read (RSFD rfd, void *buf, int *term_index) +static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index, + int (*cmpfunc)(const void *p1, const void *p2), + const void *untilbuf) { + struct rset_multior_rfd *mrfd = (struct rset_multior_rfd *) rfd; + struct rset_multior_info *info = mrfd->info; + struct heap_item it; + int rdres; + int dummycount=0; + if (heap_empty(mrfd->h)) return 0; - /* - struct rset_mor_rfd *mrfd = (struct rset_mor_rfd *) rfd; + if (cmpfunc) + assert(cmpfunc==mrfd->info->cmp); + *term_index=0; + it = *(mrfd->h->heap[1]); + memcpy(buf,it.buf, info->key_size); + if (mrfd->countp) { + if (mrfd->prevvalue) { /* in another record */ + if ( (*mrfd->info->cmp)(mrfd->prevvalue,it.buf) < -1) + (*mrfd->countp)++; + } else { + mrfd->prevvalue=xmalloc(info->key_size); + (*mrfd->countp)++; + } + memcpy(mrfd->prevvalue,it.buf, info->key_size); + } + if (untilbuf) + rdres=rset_forward(it.rset, it.fd, it.buf, &dummycount, + cmpfunc,untilbuf); + else + rdres=rset_read(it.rset, it.fd, it.buf, &dummycount); + if ( rdres ) + heap_balance(mrfd->h); + else + heap_delete(mrfd->h); + return 1; + +} + +static int r_read (RSFD rfd, void *buf, int *term_index) +{ + return r_forward(0,rfd, buf, term_index,0,0); +} + +#if 0 +static int old_read +{ + struct rset_multior_rfd *mrfd = (struct rset_multior_rfd *) rfd; struct trunc_info *ti = mrfd->ti; int n = ti->indx[ti->ptr[1]]; @@ -311,13 +380,13 @@ static int r_read (RSFD rfd, void *buf, int *term_index) return 0; *term_index = 0; memcpy (buf, ti->heap[ti->ptr[1]], ti->keysize); - if (((struct rset_mor_rfd *) rfd)->position) + if (((struct rset_multior_rfd *) rfd)->position) { - if (isc_pp_read (((struct rset_mor_rfd *) rfd)->ispt[n], ti->tmpbuf)) + if (isc_pp_read (((struct rset_multior_rfd *) rfd)->ispt[n], ti->tmpbuf)) { heap_delete (ti); if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) - ((struct rset_mor_rfd *) rfd)->position--; + ((struct rset_multior_rfd *) rfd)->position--; heap_insert (ti, ti->tmpbuf, n); } else @@ -332,7 +401,7 @@ static int r_read (RSFD rfd, void *buf, int *term_index) } while (1) { - if (!isc_pp_read (((struct rset_mor_rfd *) rfd)->ispt[n], ti->tmpbuf)) + if (!isc_pp_read (((struct rset_multior_rfd *) rfd)->ispt[n], ti->tmpbuf)) { heap_delete (ti); break; @@ -351,11 +420,14 @@ static int r_read (RSFD rfd, void *buf, int *term_index) (*mrfd->countp)++; } return 1; - */ } +#endif +/*!*/ /* FIXME Forward */ +/*!*/ /* FIXME pos */ + static int r_write (RSFD rfd, const void *buf) { - logf (LOG_FATAL, "mor set type is read-only"); + logf (LOG_FATAL, "multior set type is read-only"); return -1; } -- 1.7.10.4