X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamc%2Fmerge-d.c;h=29fc63025d949a5c03bbce2063659f15885855d7;hb=e150e51a7e20a902e9fd2f11f00811f94f67d529;hp=317818fec327af9d2f9c204757370065d1d8b60a;hpb=f6fb80f1dfd9b1a486595496a0f43aaeb16f7b40;p=idzebra-moved-to-github.git diff --git a/isamc/merge-d.c b/isamc/merge-d.c index 317818f..29fc630 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,16 +3,15 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.20 1999-09-20 15:48:06 heikki Exp $ + * $Id: merge-d.c,v 1.25 1999-11-30 13:48:04 adam Exp $ * * bugs - * (none) + * sinleton-bit has to be in the high end, not low, so as not to confuse + * ordinary small numbers, like in the next pointer.. * * missing * * optimize - * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) - * - single-entry optimizing (keep the one entry in the dict, no block) * - study and optimize block sizes (later) * - find a way to decide the size of an empty diffblock (after merge) * - On allocating more blocks (in append and merge), check the order of @@ -101,7 +100,7 @@ #include #include #include -#include +#include #include "../index/index.h" #include "isamd-p.h" @@ -127,36 +126,154 @@ struct ISAMD_DIFF_s { * Input preprocess filter ***************************************************************/ + +#define FILTER_NOTYET -1 /* no data read in yet, to be done */ + struct ISAMD_FILTER_s { ISAMD_I data; /* where the data comes from */ + ISAMD is; /* for debug flags */ struct it_key k1; /* the next item to be returned */ + int m1; /* mode for k1 */ + int r1; /* result for read of k1, or NOTYET */ struct it_key k2; /* the one after that */ + int m2; + int r2; }; -static void init_filter( ISAMD_I data ) +typedef struct ISAMD_FILTER_s *FILTER; + + +void filter_fill(FILTER F) +{ + while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) ) + { + if (F->r1==FILTER_NOTYET) + { /* move data forward in the filter */ + F->k1 = F->k2; + F->m1 = F->m2; + F->r1 = F->r2; + if ( 0 != F->r1 ) /* not eof */ + F->r2 = FILTER_NOTYET; /* say we want more */ + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d", + F->k1.sysno, + F->k1.seqno, + F->m1, F->r1); + } + if (F->r2==FILTER_NOTYET) + { /* read new bottom value */ + char *k_ptr = (char*) &F->k2; + F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2); + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d", + F->k2.sysno, F->k2.seqno, F->m2, F->r2); + } + if ( (F->k1.sysno == F->k2.sysno) && + (F->k1.seqno == F->k2.seqno) && + (F->m1 != F->m2) && + (F->r1 >0 ) && (F->r2 >0) ) + { /* del-ins pair of same key (not eof) , ignore both */ + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d", + F->k1.sysno, F->k1.seqno, + F->m1,F->m2, F->r1,F->r2); + F->r1 = FILTER_NOTYET; + F->r2 = FILTER_NOTYET; + } + } /* while */ +} /* filter_fill */ + + +FILTER filter_open( ISAMD is, ISAMD_I data ) { + FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s)); + F->is = is; + F->data = data; + F->k1.sysno=0; + F->k1.seqno=0; + F->k2=F->k1; + F->m1 = F->m2 = 0; + F->r1 = F->r2 = FILTER_NOTYET; + filter_fill(F); + return F; } -static void close_filter () +static void filter_close (FILTER F) { + xfree(F); } -static int filter_read( struct it_key *k) +static int filter_read( FILTER F, + struct it_key *k, + int *mode) { - return 0; + int res; + filter_fill(F); + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d", + F->k1.sysno, F->k1.seqno, F->m1, F->r1); + res = F->r1; + if(res) + { + *k = F->k1; + *mode= F->m1; + } + F->r1 = FILTER_NOTYET; + return res; } -static int filter_empty() +static int filter_isempty(FILTER F) { - return 0; + return ( (0 == F->r1) && (0 == F->r2)) ; } -static int filter_only_one() +static int filter_only_one(FILTER F) { - return 0; + return ( (0 != F->r1) && (0 == F->r2)); } + + +/*************************************************************** + * Singleton encoding + ***************************************************************/ +/* When there is only a single item, we don't allocate a block + * for it, but code it in the directory entry directly, if it + * fits. + */ + +#define DEC_SYSBITS 15 +#define DEC_SEQBITS 15 +#define DEC_MASK(n) ((1<<(n))-1) + +#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1)) + +int is_singleton(ISAMD_P ipos) +{ + return ( ipos != 0 ) && ( ipos & SINGLETON_BIT ); +} + + +int singleton_encode(struct it_key *k) +/* encodes the key into one int. If it does not fit, returns 0 */ +{ + if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) + return 0; /* no room dor sysno */ + if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) + return 0; /* no room dor sysno */ + return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT; +} + +void singleton_decode (int code, struct it_key *k) +{ + assert (code & SINGLETON_BIT); + k->sysno = code & DEC_MASK(DEC_SYSBITS); + code = code >> DEC_SYSBITS; + k->seqno = code & DEC_MASK(DEC_SEQBITS); +} + + /*************************************************************** * General support routines ***************************************************************/ @@ -302,7 +419,7 @@ static void getDiffInfo(ISAMD_PP pp ) if ( (pp->is->method->debug > 0) && (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) - { /* bug-hunting, this fails on some long runs that log too much */ + { logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", __FILE__,__LINE__, diffidx); logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, @@ -434,7 +551,8 @@ int isamd_read_item_merge ( ISAMD_PP pp, char **dst, struct it_key *p_key, /* the data item that didn't fit*/ - ISAMD_I data) /* more input data comes here */ + /* ISAMD_I data) */ /* more input data comes here */ + FILTER filt) /* more input data comes here */ { /* The last two args can be null for ordinary reads */ char *keyptr; char *codeptr; @@ -470,7 +588,7 @@ int isamd_read_item_merge ( p_key->sysno=p_key->seqno=0; /* used it up */ } - if (data) + if (filt) { /* we have a whole input stream to inject */ pp->diffinfo[i].difftype=DT_INPU; } @@ -479,41 +597,7 @@ int isamd_read_item_merge ( while (retry) { - retry=0; - -#ifdef SKIPTHIS - - if (0==pp->diffinfo[0].key.sysno) - { /* 0 is special case, main data. */ - oldoffs=pp->offset; - keyptr=(char*) &(pp->diffinfo[0].key); - pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr); - if (pp->is->method->debug > 7) - logf(LOG_LOG,"isamd_read_item: read main at %d-%d %d.%d (%x.%x)", - oldoffs,pp->offset, - pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno, - pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno); - } /* get main data */ - - if ( (0==pp->diffinfo[1].key.sysno) && (-1==pp->diffinfo[1].maxidx) ) - { /* 1 is another special case, the input data at merge */ - keyptr = (char *) &pp->diffinfo[1].key; - i = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[1].mode); - if (!i) - { /* did not get it */ - pp->diffinfo[1].key.sysno=0; - pp->diffinfo[1].maxidx=0; /* signal the end */ - } - if (pp->is->method->debug >7) - logf(LOG_LOG,"merge: read diff m=%d %d.%d (%x.%x)", - pp->diffinfo[1].mode, - pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno, - pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno ); - } /* get input data */ - -#endif // SKIPTHIS - - + retry=0; winner = 0; for (i=0; (!retry) && (pp->diffinfo[i].difftype); i++) { @@ -570,7 +654,9 @@ int isamd_read_item_merge ( else if (pp->diffinfo[i].difftype==DT_INPU) { keyptr = (char *) &pp->diffinfo[i].key; - rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); + /* rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); */ + rc = filter_read(filt, &pp->diffinfo[i].key, + &pp->diffinfo[i].mode); if (!rc) { /* did not get it */ pp->diffinfo[i].key.sysno=0; @@ -665,8 +751,11 @@ int isamd_read_item_merge ( assert(winner==0); /* if nothing found, nothing comes from a diff */ cmp= 0; /* eof */ } - if (pp->is->method->debug >9) - logf(LOG_LOG,"mergeDB4: sysno[1]=%d", pp->diffinfo[1].key.sysno); /*!*/ + if (cmp) + ++(pp->is->no_read_keys); + else + ++(pp->is->no_read_eof); + return cmp; } /* isamd_read_item */ @@ -684,7 +773,8 @@ int isamd_read_item (ISAMD_PP pp, char **dst) static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ struct it_key *p_key, /* the data item that didn't fit*/ - ISAMD_I data) /* more input data comes here */ + /* ISAMD_I data) */ /* more input data comes here */ + FILTER filt) /* more input data arriving here */ { int diffidx; int killblk=0; @@ -700,7 +790,7 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ /* resize later. Saves disk, but will lead */ /* into bad seeks. */ - ++(readpp->is->files[0].no_merges); + ++(readpp->is->no_merges); /* set up diffs as they should be for reading */ diffidx = ISAMD_BLOCK_OFFSET_1; @@ -724,7 +814,8 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ r_ptr= (char *) &r_key; - r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data); +/* r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data); */ + r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt); if (!r_more) { /* oops, all data has been deleted! what to do??? */ /* never mind, we have at least one more delta to add to the block */ @@ -734,7 +825,6 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ if (readpp->is->method->debug >5) logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ", readpp->numKeys); - //assert (readpp->numKeys == 0); /* no longer true! */ } @@ -767,7 +857,7 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ /* (try to) read next item */ r_ptr= (char *) &r_key; - r_more = isamd_read_item_merge( readpp, &r_ptr,0,data); + r_more = isamd_read_item_merge( readpp, &r_ptr,0,filt); } /* while read */ @@ -814,7 +904,11 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ -static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) +static int append_diffs( + ISAMD is, + ISAMD_P ipos, + /*ISAMD_I data)*/ + FILTER filt) { struct it_key i_key; /* one input item */ char *i_item = (char *) &i_key; /* same as chars */ @@ -838,12 +932,12 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) firstpp=isamd_pp_open(is, isamd_addr(0,0) ); firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; /* create in smallest category, will expand later */ - ++(is->files[0].no_fbuilds); + ++(is->no_fbuilds); } else { firstpp=isamd_pp_open(is, ipos); - ++(is->files[0].no_appds); + ++(is->no_appds); } if (is->method->debug >2) @@ -858,8 +952,9 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) diffidx+=sizeof(int); /* difflen will be stored here */ /* read first input */ - i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + //i_ptr = i_item; //!!! + i_more = filter_read(filt, &i_key, &i_mode); + /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */ if (is->method->debug >6) logf(LOG_LOG,"isamd_appd: start m=%d %d.%d=%x.%x: %d", @@ -875,7 +970,7 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) i_key.seqno = i_key.seqno * 2 + i_mode; c_ptr=codebuff; - i_ptr=i_item; + i_ptr=i_item; (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData, &c_ptr, &i_ptr); codelen = c_ptr - codebuff; @@ -887,31 +982,42 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) if (diffidx + codelen > maxsize ) { /* block full */ - if (firstpp->cat < firstpp->is->max_cat) - { /* just increase the block size */ + while ( (firstpp->cat < firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* try to increase the block size */ if (firstpp->pos > 0) /* free the old block if allocated */ isamd_release_block(is, firstpp->cat, firstpp->pos); ++firstpp->cat; maxsize = is->method->filecat[firstpp->cat].bsize; firstpp->pos=0; /* need to allocate it when saving */ if (is->method->debug >3) - logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)", + logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)", firstpp->cat, maxsize); } - else - { /* max size already - can't help, need to merge it */ + if ((firstpp->cat >= firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* max size - can't help, need to merge it */ if (is->method->debug >7) - logf(LOG_LOG,"isamd_appd: block full"); + logf(LOG_LOG,"isamd_appd: need to merge"); if (is->method->debug >9) //!!!!! logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d", i_mode, i_key.sysno, i_key.seqno); - merge_rc = merge (firstpp, &i_key, data); + merge_rc = merge (firstpp, &i_key, filt); if (0!=merge_rc) return merge_rc; /* merge handled them all ! */ assert(!"merge returned zero ??"); } /* need to merge */ } /* block full */ - + + if (!( diffidx+codelen <= maxsize )) + { /* bug hunting */ + logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d", + diffidx, codelen, diffidx+codelen, maxsize); + logf(LOG_LOG,"ipos=%d f=%d=%d:%d", + ipos, + isamd_addr(firstpp->pos, firstpp->cat), + firstpp->cat, firstpp->pos ); + } assert ( diffidx+codelen <= maxsize ); /* save the diff */ @@ -929,7 +1035,8 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) /* (try to) read the next input */ i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + i_more = filter_read(filt, &i_key, &i_mode); + /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */ if ( (i_more) && (is->method->debug >6) ) logf(LOG_LOG,"isamd_appd: got m=%d %d.%d=%x.%x: %d", i_mode, @@ -956,12 +1063,55 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) /************************************************************* - * isamd_append itself, Sweet, isn't it + * isamd_append itself *************************************************************/ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) { - return append_diffs(is,ipos,data); + FILTER F = filter_open(is,data); + ISAMD_P rc=0; + + int olddebug= is->method->debug; + if (ipos == 7320) + is->method->debug = 99; /*!*/ + + if ( filter_isempty(F) ) /* can be, if del-ins of the same */ + { + if (is->method->debug >3) + logf(LOG_LOG,"isamd_appd: nothing to do for %d=",ipos); + filter_close(F); + ++(is->no_non); + return ipos; /* without doing anything at all */ + } + + if ( ( 0==ipos) && filter_only_one(F) ) + { + struct it_key k; + int mode; + filter_read(F,&k,&mode); + assert(mode); + rc = singleton_encode(&k); + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton %d (%x)", + rc,rc); + if (rc) + is->no_singles++; + assert ( (rc==0) || is_singleton(rc) ); + } + if ( 0==rc) /* either not single, or it did not fit */ + { + rc = append_diffs(is,ipos,F); + assert ( ! is_singleton(rc) ); + /* can happen if we run out of bits, so that block numbers overflow */ + /* to SINGLETON_BIT */ + } + filter_close(F); + + if (is->method->debug >2) + logf(LOG_LOG,"isamd_appd: ret %d=%x (%d=%x)", + rc,rc,ipos,ipos); + is->method->debug=olddebug; /*!*/ + return rc; } /* isamd_append */ @@ -972,7 +1122,22 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.20 1999-09-20 15:48:06 heikki + * Revision 1.25 1999-11-30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.24 1999/10/05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.23 1999/09/27 14:36:36 heikki + * singletons + * + * Revision 1.22 1999/09/23 18:01:18 heikki + * singleton optimising + * + * Revision 1.21 1999/09/21 17:36:43 heikki + * Added filter function. Not much of effect on the small test set... + * + * Revision 1.20 1999/09/20 15:48:06 heikki * Small changes * * Revision 1.19 1999/09/13 13:28:28 heikki @@ -1031,5 +1196,3 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */ - -