From afc642709ad7c44f9bce0bbf533d8d76fbb1bcbd Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Thu, 23 Sep 1999 18:01:18 +0000 Subject: [PATCH] singleton optimising --- isamc/isamd-p.h | 12 +++++-- isamc/isamd.c | 78 ++++++++++++++++++++++++++++++------------- isamc/merge-d.c | 100 +++++++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 146 insertions(+), 44 deletions(-) diff --git a/isamc/isamd-p.h b/isamc/isamd-p.h index 843a183..746c14c 100644 --- a/isamc/isamd-p.h +++ b/isamc/isamd-p.h @@ -1,4 +1,4 @@ -/* $Id: isamd-p.h,v 1.7 1999-09-20 15:48:06 heikki Exp $ +/* $Id: isamd-p.h,v 1.8 1999-09-23 18:01:18 heikki Exp $ * Copyright (c) 1995-1996, Index Data. * See the file LICENSE for details. * Heikki Levanto @@ -97,6 +97,11 @@ int isamd_read_block (ISAMD is, int cat, int pos, char *dst); int isamd_write_block (ISAMD is, int cat, int pos, char *src); void isamd_free_diffs(ISAMD_PP pp); +int is_singleton(ISAMD_P ipos); +void singleton_decode (int code, struct it_key *k); +int singleton_encode(struct it_key *k); + + #ifdef __cplusplus } #endif @@ -105,7 +110,10 @@ void isamd_free_diffs(ISAMD_PP pp); /* * $Log: isamd-p.h,v $ - * Revision 1.7 1999-09-20 15:48:06 heikki + * Revision 1.8 1999-09-23 18:01:18 heikki + * singleton optimising + * + * Revision 1.7 1999/09/20 15:48:06 heikki * Small changes * * Revision 1.6 1999/08/25 18:09:23 heikki diff --git a/isamc/isamd.c b/isamc/isamd.c index c15d7c7..8f57e6d 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1995-1998, Index Data. * See the file LICENSE for details. - * $Id: isamd.c,v 1.13 1999-09-20 15:48:06 heikki Exp $ + * $Id: isamd.c,v 1.14 1999-09-23 18:01:18 heikki Exp $ * * Isamd - isam with diffs * Programmed by: Heikki Levanto @@ -480,10 +480,12 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) char *src; int sz = is->method->filecat[is->max_cat].bsize; /* always allocate for the largest blocks, saves trouble */ - - pp->cat = isamd_type(ipos); - pp->pos = isamd_block(ipos); - + struct it_key singlekey; + char *c_ptr; /* for fake encoding the singlekey */ + char *i_ptr; + int ofs; + + pp->numKeys = 0; src = pp->buf = (char *) xmalloc (sz); memset(src,'\0',sz); /* clear the buffer, for new blocks */ @@ -491,12 +493,47 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) pp->size = 0; pp->offset = 0; pp->is = is; - pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); - pp->numKeys = 0; -// pp->diffs=0; + pp->diffs=0; pp->diffbuf=0; pp->diffinfo=0; + pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); + if ( is_singleton(ipos) ) + { + pp->cat=0; + pp->pos=0; + if (is->method->debug > 5) + logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d", + pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, + pp->next, isamd_type(pp->next), isamd_block(pp->next) ); + singleton_decode(ipos, &singlekey ); + pp->offset=ISAMD_BLOCK_OFFSET_1; + pp->numKeys = 1; + ofs=pp->offset+sizeof(int); /* reserve length of diffsegment */ + singlekey.seqno = singlekey.seqno * 2 + 1; /* make an insert diff */ + c_ptr=&(pp->buf[ofs]); + i_ptr=(char*)(&singlekey); + (*is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData, + &c_ptr, &i_ptr); + (*is->method->code_reset)(pp->decodeClientData); + ofs += c_ptr-&(pp->buf[ofs]); + memcpy( &(pp->buf[pp->offset]), &ofs, sizeof(int) ); + /* since we memset buf earlier, we already have a zero endmark! */ + pp->size = ofs; + if (is->method->debug > 5) + logf (LOG_LOG, "isamd_pp_open single %d=%x: %d.%d sz=%d", + ipos,ipos, + singlekey.sysno, singlekey.seqno/2, + pp->size ); + return pp; + } /* singleton */ + + + ipos=ipos>>1; /* remove the singleton bit */ + + pp->cat = isamd_type(ipos); + pp->pos = isamd_block(ipos); + if (pp->pos) { src = pp->buf; @@ -507,27 +544,16 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) src += sizeof(pp->size); memcpy (&pp->numKeys, src, sizeof(pp->numKeys)); src += sizeof(pp->numKeys); -// memcpy (&pp->diffs, src, sizeof(pp->diffs)); -// src += sizeof(pp->diffs); assert (pp->next != pp->pos); pp->offset = src - pp->buf; assert (pp->offset == ISAMD_BLOCK_OFFSET_1); -// if (0==pp->diffs) -// ++(is->files[pp->cat].no_op_nodiff); -// else -// if(pp->diffs&1) -// ++(is->files[pp->cat].no_op_extdiff); -// else -// ++(is->files[pp->cat].no_op_intdiff); - // if (!pp->diffbuf) - // pp->diffbuf=pp->buf; + assert(pp->size>=ISAMD_BLOCK_OFFSET_1); /*??*/ } if (is->method->debug > 5) logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d", pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, pp->next, isamd_type(pp->next), isamd_block(pp->next) ); - - + return pp; } @@ -574,9 +600,10 @@ void isamd_buildlaterblock(ISAMD_PP pp){ /* returns non-zero if item could be read; 0 otherwise */ int isamd_pp_read (ISAMD_PP pp, void *buf) { + return isamd_read_item (pp, (char **) &buf); - /* note: isamd_read_item is in merge-d.c, because it is so */ - /* convoluted with the merge process */ + /* note: isamd_read_item is in merge-d.c, because it is so */ + /* convoluted with the merge process */ } /* read one main item from file - decode and store it in *dst. @@ -737,7 +764,10 @@ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) /* * $Log: isamd.c,v $ - * Revision 1.13 1999-09-20 15:48:06 heikki + * Revision 1.14 1999-09-23 18:01:18 heikki + * singleton optimising + * + * Revision 1.13 1999/09/20 15:48:06 heikki * Small changes * * Revision 1.12 1999/09/13 13:28:28 heikki diff --git a/isamc/merge-d.c b/isamc/merge-d.c index a55741f..a32b72a 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.21 1999-09-21 17:36:43 heikki Exp $ + * $Id: merge-d.c,v 1.22 1999-09-23 18:01:18 heikki Exp $ * * bugs * (none) @@ -11,8 +11,6 @@ * missing * * optimize - * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) - * - single-entry optimizing (keep the one entry in the dict, no block) * - study and optimize block sizes (later) * - find a way to decide the size of an empty diffblock (after merge) * - On allocating more blocks (in append and merge), check the order of @@ -196,6 +194,7 @@ FILTER filter_open( ISAMD is, ISAMD_I data ) F->m1 = F->m2 = 0; F->r1 = F->r2 = FILTER_NOTYET; filter_fill(F); + return F; } static void filter_close (FILTER F) @@ -220,28 +219,58 @@ static int filter_read( FILTER F, } F->r1 = FILTER_NOTYET; return res; - -#ifdef SKIPTHIS - char *k_ptr = (char*) k; - int res = (F->data->read_item)(F->data->clientData, &k_ptr, mode); - if (F->is->method->debug > 9) - logf(LOG_LOG,"filt_read: start %d.%d m=%d r=%d", - k->sysno, k->seqno, *mode, res); - return res; -#endif } -static int filter_empty(FILTER F) +static int filter_isempty(FILTER F) { - return 0; + return ( (0 == F->r1) && (0 == F->r2)) ; } static int filter_only_one(FILTER F) { - return 0; + return ( (0 != F->r1) && (0 == F->r2)); } + + +/*************************************************************** + * Singleton encoding + ***************************************************************/ +/* When there is only a single item, we don't allocate a block + * for it, but code it in the directory entry directly, if it + * fits. + */ + +#define DEC_SYSBITS 15 +#define DEC_SEQBITS 15 +#define DEC_MASK(n) ((1<<(n))-1) + +int is_singleton(ISAMD_P ipos) +{ + return ( ipos != 0 ) && ( ipos & 1 ); +} + + +int singleton_encode(struct it_key *k) +/* encodes the key into one int. If it does not fit, returns 0 */ +{ + if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) + return 0; /* no room dor sysno */ + if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) + return 0; /* no room dor sysno */ + return ( (k->sysno | (k->seqno << DEC_SYSBITS) ) << 1) | 1; +} + +void singleton_decode (int code, struct it_key *k) +{ + assert ((code & 1) == 1 ); + code = code >> 1; + k->sysno = code & DEC_MASK(DEC_SYSBITS); + k->seqno = code >> DEC_SYSBITS ; +} + + /*************************************************************** * General support routines ***************************************************************/ @@ -387,7 +416,7 @@ static void getDiffInfo(ISAMD_PP pp ) if ( (pp->is->method->debug > 0) && (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) - { /* bug-hunting, this fails on some long runs that log too much */ + { logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", __FILE__,__LINE__, diffidx); logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, @@ -1024,8 +1053,40 @@ static int append_diffs( ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) { FILTER F = filter_open(is,data); - return append_diffs(is,ipos,F); + ISAMD_P rc=0; + + if ( filter_isempty(F) ) /* can be, if del-ins of the same */ + { + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: nothing to do"); + filter_close(F); + return ipos; /* without doing anything at all */ + } + + if ( ( 0==ipos) && filter_only_one(F) ) + { + struct it_key k; + int mode; + filter_read(F,&k,&mode); + assert(mode); + rc = singleton_encode(&k); + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton %d (%x)", + rc,rc); + assert ( (rc==0) || is_singleton(rc) ); + } + if ( 0==rc) /* either not single, or it did not fit */ + { + rc = append_diffs(is,ipos,F)<<1; /* not singleton */ + assert ( ! is_singleton(rc) ); /*!*/ + } filter_close(F); + + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: ret %d=%x (%d=%x)", + rc,rc,ipos,ipos); + + return rc; } /* isamd_append */ @@ -1036,7 +1097,10 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.21 1999-09-21 17:36:43 heikki + * Revision 1.22 1999-09-23 18:01:18 heikki + * singleton optimising + * + * Revision 1.21 1999/09/21 17:36:43 heikki * Added filter function. Not much of effect on the small test set... * * Revision 1.20 1999/09/20 15:48:06 heikki -- 1.7.10.4