X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamc%2Fmerge-d.c;h=29fc63025d949a5c03bbce2063659f15885855d7;hb=e150e51a7e20a902e9fd2f11f00811f94f67d529;hp=a55741f58567974f2641313d3d748a696f9c29ed;hpb=a47d9b179296ab7f1fbc2921531c1514b1dc9530;p=idzebra-moved-to-github.git diff --git a/isamc/merge-d.c b/isamc/merge-d.c index a55741f..29fc630 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,16 +3,15 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.21 1999-09-21 17:36:43 heikki Exp $ + * $Id: merge-d.c,v 1.25 1999-11-30 13:48:04 adam Exp $ * * bugs - * (none) + * sinleton-bit has to be in the high end, not low, so as not to confuse + * ordinary small numbers, like in the next pointer.. * * missing * * optimize - * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) - * - single-entry optimizing (keep the one entry in the dict, no block) * - study and optimize block sizes (later) * - find a way to decide the size of an empty diffblock (after merge) * - On allocating more blocks (in append and merge), check the order of @@ -101,7 +100,7 @@ #include #include #include -#include +#include #include "../index/index.h" #include "isamd-p.h" @@ -196,6 +195,7 @@ FILTER filter_open( ISAMD is, ISAMD_I data ) F->m1 = F->m2 = 0; F->r1 = F->r2 = FILTER_NOTYET; filter_fill(F); + return F; } static void filter_close (FILTER F) @@ -220,28 +220,60 @@ static int filter_read( FILTER F, } F->r1 = FILTER_NOTYET; return res; - -#ifdef SKIPTHIS - char *k_ptr = (char*) k; - int res = (F->data->read_item)(F->data->clientData, &k_ptr, mode); - if (F->is->method->debug > 9) - logf(LOG_LOG,"filt_read: start %d.%d m=%d r=%d", - k->sysno, k->seqno, *mode, res); - return res; -#endif } -static int filter_empty(FILTER F) +static int filter_isempty(FILTER F) { - return 0; + return ( (0 == F->r1) && (0 == F->r2)) ; } static int filter_only_one(FILTER F) { - return 0; + return ( (0 != F->r1) && (0 == F->r2)); +} + + + + +/*************************************************************** + * Singleton encoding + ***************************************************************/ +/* When there is only a single item, we don't allocate a block + * for it, but code it in the directory entry directly, if it + * fits. + */ + +#define DEC_SYSBITS 15 +#define DEC_SEQBITS 15 +#define DEC_MASK(n) ((1<<(n))-1) + +#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1)) + +int is_singleton(ISAMD_P ipos) +{ + return ( ipos != 0 ) && ( ipos & SINGLETON_BIT ); } +int singleton_encode(struct it_key *k) +/* encodes the key into one int. If it does not fit, returns 0 */ +{ + if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) + return 0; /* no room dor sysno */ + if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) + return 0; /* no room dor sysno */ + return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT; +} + +void singleton_decode (int code, struct it_key *k) +{ + assert (code & SINGLETON_BIT); + k->sysno = code & DEC_MASK(DEC_SYSBITS); + code = code >> DEC_SYSBITS; + k->seqno = code & DEC_MASK(DEC_SEQBITS); +} + + /*************************************************************** * General support routines ***************************************************************/ @@ -387,7 +419,7 @@ static void getDiffInfo(ISAMD_PP pp ) if ( (pp->is->method->debug > 0) && (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) - { /* bug-hunting, this fails on some long runs that log too much */ + { logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", __FILE__,__LINE__, diffidx); logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, @@ -719,8 +751,11 @@ int isamd_read_item_merge ( assert(winner==0); /* if nothing found, nothing comes from a diff */ cmp= 0; /* eof */ } - if (pp->is->method->debug >9) - logf(LOG_LOG,"mergeDB4: sysno[1]=%d", pp->diffinfo[1].key.sysno); /*!*/ + if (cmp) + ++(pp->is->no_read_keys); + else + ++(pp->is->no_read_eof); + return cmp; } /* isamd_read_item */ @@ -755,7 +790,7 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ /* resize later. Saves disk, but will lead */ /* into bad seeks. */ - ++(readpp->is->files[0].no_merges); + ++(readpp->is->no_merges); /* set up diffs as they should be for reading */ diffidx = ISAMD_BLOCK_OFFSET_1; @@ -790,7 +825,6 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ if (readpp->is->method->debug >5) logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ", readpp->numKeys); - //assert (readpp->numKeys == 0); /* no longer true! */ } @@ -898,12 +932,12 @@ static int append_diffs( firstpp=isamd_pp_open(is, isamd_addr(0,0) ); firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; /* create in smallest category, will expand later */ - ++(is->files[0].no_fbuilds); + ++(is->no_fbuilds); } else { firstpp=isamd_pp_open(is, ipos); - ++(is->files[0].no_appds); + ++(is->no_appds); } if (is->method->debug >2) @@ -948,21 +982,23 @@ static int append_diffs( if (diffidx + codelen > maxsize ) { /* block full */ - if (firstpp->cat < firstpp->is->max_cat) - { /* just increase the block size */ + while ( (firstpp->cat < firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* try to increase the block size */ if (firstpp->pos > 0) /* free the old block if allocated */ isamd_release_block(is, firstpp->cat, firstpp->pos); ++firstpp->cat; maxsize = is->method->filecat[firstpp->cat].bsize; firstpp->pos=0; /* need to allocate it when saving */ if (is->method->debug >3) - logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)", + logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)", firstpp->cat, maxsize); } - else - { /* max size already - can't help, need to merge it */ + if ((firstpp->cat >= firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* max size - can't help, need to merge it */ if (is->method->debug >7) - logf(LOG_LOG,"isamd_appd: block full"); + logf(LOG_LOG,"isamd_appd: need to merge"); if (is->method->debug >9) //!!!!! logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d", i_mode, i_key.sysno, i_key.seqno); @@ -972,7 +1008,16 @@ static int append_diffs( assert(!"merge returned zero ??"); } /* need to merge */ } /* block full */ - + + if (!( diffidx+codelen <= maxsize )) + { /* bug hunting */ + logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d", + diffidx, codelen, diffidx+codelen, maxsize); + logf(LOG_LOG,"ipos=%d f=%d=%d:%d", + ipos, + isamd_addr(firstpp->pos, firstpp->cat), + firstpp->cat, firstpp->pos ); + } assert ( diffidx+codelen <= maxsize ); /* save the diff */ @@ -1024,8 +1069,49 @@ static int append_diffs( ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) { FILTER F = filter_open(is,data); - return append_diffs(is,ipos,F); + ISAMD_P rc=0; + + int olddebug= is->method->debug; + if (ipos == 7320) + is->method->debug = 99; /*!*/ + + if ( filter_isempty(F) ) /* can be, if del-ins of the same */ + { + if (is->method->debug >3) + logf(LOG_LOG,"isamd_appd: nothing to do for %d=",ipos); + filter_close(F); + ++(is->no_non); + return ipos; /* without doing anything at all */ + } + + if ( ( 0==ipos) && filter_only_one(F) ) + { + struct it_key k; + int mode; + filter_read(F,&k,&mode); + assert(mode); + rc = singleton_encode(&k); + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton %d (%x)", + rc,rc); + if (rc) + is->no_singles++; + assert ( (rc==0) || is_singleton(rc) ); + } + if ( 0==rc) /* either not single, or it did not fit */ + { + rc = append_diffs(is,ipos,F); + assert ( ! is_singleton(rc) ); + /* can happen if we run out of bits, so that block numbers overflow */ + /* to SINGLETON_BIT */ + } filter_close(F); + + if (is->method->debug >2) + logf(LOG_LOG,"isamd_appd: ret %d=%x (%d=%x)", + rc,rc,ipos,ipos); + is->method->debug=olddebug; /*!*/ + return rc; } /* isamd_append */ @@ -1036,7 +1122,19 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.21 1999-09-21 17:36:43 heikki + * Revision 1.25 1999-11-30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.24 1999/10/05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.23 1999/09/27 14:36:36 heikki + * singletons + * + * Revision 1.22 1999/09/23 18:01:18 heikki + * singleton optimising + * + * Revision 1.21 1999/09/21 17:36:43 heikki * Added filter function. Not much of effect on the small test set... * * Revision 1.20 1999/09/20 15:48:06 heikki