X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=isamc%2Fmerge-d.c;h=29fc63025d949a5c03bbce2063659f15885855d7;hp=8c41ebdb1301876711ffa0f86dd13536fa088907;hb=2b33b395b50c562323ea2b0251f9b798cf5241d2;hpb=544e8fcc65541951ba92d0212838a0bebbc3084c diff --git a/isamc/merge-d.c b/isamc/merge-d.c index 8c41ebd..29fc630 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,20 +3,22 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.13 1999-08-18 13:59:19 heikki Exp $ + * $Id: merge-d.c,v 1.25 1999-11-30 13:48:04 adam Exp $ * - * todo - * - Clean up log levels - * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) - * - single-entry optimizing + * bugs + * sinleton-bit has to be in the high end, not low, so as not to confuse + * ordinary small numbers, like in the next pointer.. + * + * missing + * + * optimize * - study and optimize block sizes (later) - * - Clean up the different ways diffs are handled in writing and reading - * - Keep a merge-count in the firstpp, and if the block has already been - * merged, reduce it to a larger size even if it could fit in a small one! - * - Keep minimum freespace in the category table, and use that in reduce! + * - find a way to decide the size of an empty diffblock (after merge) + * - On allocating more blocks (in append and merge), check the order of + * blocks, and if needed, swap them. + * - Write a routine to save/load indexes into a block, save only as many + * bytes as needed (size, diff, diffindexes) * - * bugs - * - Still has not been able to run a complete long test on bagel! * * caveat * There is a confusion about the block addresses. cat or type is the category, @@ -32,7 +34,7 @@ * Needs cleaning! The way diff blocks are handled in append and reading is * quite different, and likely to give maintenance problems. * - * log levels (set isamd=x in zebra.cfg (or what ever cfg file you use) ) + * log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) ) * 0 = no logging. Default * 1 = no logging here. isamd logs overall statistics * 2 = Each call to isamd_append with start address and no more @@ -43,15 +45,62 @@ * 7 = Log each record as it passes the system (once) * 8 = Log raw and (de)coded data * 9 = Anything else that may be useful - * .. = Anything needed toi hunt a specific bug + * .. = Anything needed to hunt a specific bug * (note that all tests in the code are like debug>3, which means 4 or above!) + * + * Design for the new and improved isamd + * Key points: + * - The first block is only diffs, no straight data + * - Additional blocks are straight data + * - When a diff block gets filled up, a data block is created by + * merging the diffs with the data + * + * Structure + * - Isamd_pp: buffer for diffs and for data + * keep both pos, type, and combined address + * routine to set the address + * - diffbuf: lengths as short ints, or bytes for small blocks + * - keys are of key_struct, not just a number of bytes. + * + * Routines + * - isamd_append + * - create_new_block if needed + * - append_diffs + * - load_diffs + * - get diffend, start encoding + * - while input data + * - encode it + * - if no room, then realloc block in larger size + * - if still no room, merge and exit + * - append in the block + * + * - merge + * - just as before, except that merges also input data directly + * - writes into new data blocks + * + * + * - isamd.c: load firstpp, load datablock + * save firstpp, save datablock + * - Readlength, writelength - handling right size of len fields + * - isamd_read_main_item: take also a merge input structure, and merge it too + * - prefilter: cache two inputs, and check if they cancel. + * - single-item optimization + * + * questions: Should we realloc firstblocks in a different size as the main + * blocks. Makes a sideways seek, which is bound to be slowe. But saves some + * update time. Compromise: alloc the first one in the size of the datablock, + * but increase if necessary. Large blocks get a large diff, ok. Small ones + * may get an extra seek in read, but save merges. */ + +#define NEW_ISAM_D 1 /* not yet ready to delete the old one! */ + #include #include #include #include -#include +#include #include "../index/index.h" #include "isamd-p.h" @@ -62,8 +111,173 @@ struct ISAMD_DIFF_s { struct it_key key; void *decodeData; int mode; + int difftype; +}; + +#define DT_NONE 0 // no diff, marks end of sequence +#define DT_DIFF 1 // ordinarry diff +#define DT_MAIN 2 // main data +#define DT_INPU 3 // input data to be merged +#define DT_DONE 4 // done with all input here + + + +/*************************************************************** + * Input preprocess filter + ***************************************************************/ + + +#define FILTER_NOTYET -1 /* no data read in yet, to be done */ + +struct ISAMD_FILTER_s { + ISAMD_I data; /* where the data comes from */ + ISAMD is; /* for debug flags */ + struct it_key k1; /* the next item to be returned */ + int m1; /* mode for k1 */ + int r1; /* result for read of k1, or NOTYET */ + struct it_key k2; /* the one after that */ + int m2; + int r2; }; +typedef struct ISAMD_FILTER_s *FILTER; + + +void filter_fill(FILTER F) +{ + while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) ) + { + if (F->r1==FILTER_NOTYET) + { /* move data forward in the filter */ + F->k1 = F->k2; + F->m1 = F->m2; + F->r1 = F->r2; + if ( 0 != F->r1 ) /* not eof */ + F->r2 = FILTER_NOTYET; /* say we want more */ + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d", + F->k1.sysno, + F->k1.seqno, + F->m1, F->r1); + } + if (F->r2==FILTER_NOTYET) + { /* read new bottom value */ + char *k_ptr = (char*) &F->k2; + F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2); + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d", + F->k2.sysno, F->k2.seqno, F->m2, F->r2); + } + if ( (F->k1.sysno == F->k2.sysno) && + (F->k1.seqno == F->k2.seqno) && + (F->m1 != F->m2) && + (F->r1 >0 ) && (F->r2 >0) ) + { /* del-ins pair of same key (not eof) , ignore both */ + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d", + F->k1.sysno, F->k1.seqno, + F->m1,F->m2, F->r1,F->r2); + F->r1 = FILTER_NOTYET; + F->r2 = FILTER_NOTYET; + } + } /* while */ +} /* filter_fill */ + + +FILTER filter_open( ISAMD is, ISAMD_I data ) +{ + FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s)); + F->is = is; + F->data = data; + F->k1.sysno=0; + F->k1.seqno=0; + F->k2=F->k1; + F->m1 = F->m2 = 0; + F->r1 = F->r2 = FILTER_NOTYET; + filter_fill(F); + return F; +} + +static void filter_close (FILTER F) +{ + xfree(F); +} + +static int filter_read( FILTER F, + struct it_key *k, + int *mode) +{ + int res; + filter_fill(F); + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d", + F->k1.sysno, F->k1.seqno, F->m1, F->r1); + res = F->r1; + if(res) + { + *k = F->k1; + *mode= F->m1; + } + F->r1 = FILTER_NOTYET; + return res; +} + +static int filter_isempty(FILTER F) +{ + return ( (0 == F->r1) && (0 == F->r2)) ; +} + +static int filter_only_one(FILTER F) +{ + return ( (0 != F->r1) && (0 == F->r2)); +} + + + + +/*************************************************************** + * Singleton encoding + ***************************************************************/ +/* When there is only a single item, we don't allocate a block + * for it, but code it in the directory entry directly, if it + * fits. + */ + +#define DEC_SYSBITS 15 +#define DEC_SEQBITS 15 +#define DEC_MASK(n) ((1<<(n))-1) + +#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1)) + +int is_singleton(ISAMD_P ipos) +{ + return ( ipos != 0 ) && ( ipos & SINGLETON_BIT ); +} + + +int singleton_encode(struct it_key *k) +/* encodes the key into one int. If it does not fit, returns 0 */ +{ + if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) + return 0; /* no room dor sysno */ + if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) + return 0; /* no room dor sysno */ + return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT; +} + +void singleton_decode (int code, struct it_key *k) +{ + assert (code & SINGLETON_BIT); + k->sysno = code & DEC_MASK(DEC_SYSBITS); + code = code >> DEC_SYSBITS; + k->seqno = code & DEC_MASK(DEC_SEQBITS); +} + + +/*************************************************************** + * General support routines + ***************************************************************/ + static char *hexdump(unsigned char *p, int len, char *buff) { @@ -81,38 +295,112 @@ static char *hexdump(unsigned char *p, int len, char *buff) { } -static int separateDiffBlock(ISAMD_PP pp) + +static void isamd_reduceblock(ISAMD_PP pp) +/* takes a large block, and reduces its category if possible */ +/* Presumably the first block in an isam-list */ { - int limit = sizeof(int) + 8; - if (pp->next) - return 1; /* multi-block chains always have a separate diff block */ - return ( pp->size + limit >= pp->is->method->filecat[pp->cat].bsize); - /* make sure there is at least room for the length and one diff. if not, */ - /* it goes to a separate block. Assumes max diff is 8 bytes. Not */ - /* unreaalistic in large data sets, where first sysno may be very large, */ - /* and even the first seqno may be quite something. */ - - /* todo: Make the limit adjustable in the filecat table ! */ + if (pp->pos) + return; /* existing block, do not touch */ + /* TODO: Probably we may touch anyway? */ + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d", + pp->pos, pp->cat, pp->size); + while ( ( pp->cat > 0 ) && (!pp->next) && + (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) ) + pp->cat--; + pp->pos = isamd_alloc_block(pp->is, pp->cat); + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d", + pp->pos, pp->cat, pp->size); +} /* reduceblock */ + + +static int save_first_pp ( ISAMD_PP firstpp) +{ + isamd_buildfirstblock(firstpp); + isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf); + return isamd_addr(firstpp->pos,firstpp->cat); } + +static void save_last_pp (ISAMD_PP pp) +{ + pp->next = 0;/* just to be sure */ + isamd_buildlaterblock(pp); + isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); +} + +#ifdef UNUSED +static int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp) +{ + /* order of things: Better to save firstpp first, if there are just two */ + /* blocks, but last if there are blocks in between, as these have already */ + /* been saved... optimise later (that's why this is in its own func...*/ + int retval = save_first_pp(firstpp); + if (firstpp!=pp){ + save_last_pp(pp); + isamd_pp_close(pp); + } + isamd_pp_close(firstpp); + return retval; +} /* save_both_pps */ +#endif + + + +/*************************************************************** + * Diffblock handling + ***************************************************************/ + +void isamd_free_diffs(ISAMD_PP pp) +{ + int i; + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo); + if (!pp->diffinfo) + return; + for (i=0;pp->diffinfo[i].difftype!=DT_NONE;i++) + if(pp->diffinfo[i].decodeData) + { + if (pp->is->method->debug > 8) + logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i, + pp->diffinfo[i].decodeData); + (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData); + } + xfree(pp->diffinfo); + if (pp->diffbuf != pp->buf) + xfree (pp->diffbuf); + pp->diffbuf=0; + pp->diffinfo=0; +} /* isamd_free_diffs */ -/************************************************************** - * Reading - **************************************************************/ -static void getDiffInfo(ISAMD_PP pp, int diffidx) +static void getDiffInfo(ISAMD_PP pp ) { /* builds the diff info structures from a diffblock */ - int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +1; + int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +2; /* Each diff takes at least 5 bytes. Probably more, but this is safe */ - int i=1; /* [0] is used for the main data */ + int i=1; /* [0] is used for the main data, [n+1] for merge inputs */ int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s); + int maxsz = pp->is->method->filecat[pp->is->max_cat].bsize; + int diffidx = ISAMD_BLOCK_OFFSET_1; pp->diffinfo = xmalloc( diffsz ); + pp->offset = pp->size+1; /* used this block up */ memset(pp->diffinfo,'\0',diffsz); if (pp->is->method->debug > 5) - logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d", - isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos); - assert(pp->diffbuf); + logf(LOG_LOG,"isamd_getDiffInfo: %d=%d:%d->%d, ix=%d mx=%d", + isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->next, + diffidx,maxinfos); + + /* duplicate the buffer for diffs */ + /* (so that we can read the next real buffer(s) */ + assert(0==pp->diffbuf); + pp->diffbuf=xmalloc(maxsz); + memcpy(pp->diffbuf, pp->buf, maxsz); + + pp->diffinfo[0].maxidx=-1; /* mark as special */ + pp->diffinfo[0].difftype=DT_MAIN; while (idiffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) ); - + pp->diffinfo[i].difftype=DT_DIFF; if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p", pp->diffinfo[i].maxidx, diffidx, pp->diffbuf); if ( (pp->is->method->debug > 0) && (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) - { /* bug-hunting, this fails on some long runs that log too much */ + { logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", __FILE__,__LINE__, diffidx); logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, @@ -164,60 +452,108 @@ static void getDiffInfo(ISAMD_PP pp, int diffidx) assert (!"too many diff sequences in the block"); } -static void loadDiffs(ISAMD_PP pp) -{ /* assumes pp is a firstblock! */ - int diffidx; - int diffaddr; - if (0==pp->diffs) - return; /* no diffs to talk about */ - if (pp->diffs & 1 ) - { /* separate diff block, load it */ - pp->diffbuf= xmalloc( pp->is->method->filecat[pp->cat].bsize); - diffaddr=isamd_addr(pp->diffs/2, pp->cat); - isamd_read_block (pp->is, isamd_type(diffaddr), - isamd_block(diffaddr), pp->diffbuf ); - diffidx= ISAMD_BLOCK_OFFSET_N; - if (pp->is->method->debug > 4) - logf(LOG_LOG,"isamd_LoadDiffs: loaded block %d=%d:%d, d=%d ix=%d", - diffaddr, isamd_type(diffaddr),isamd_block(diffaddr), - pp->diffs,diffidx); - } - else - { /* integrated block, just set the pointers */ - pp->diffbuf = pp->buf; - diffidx = pp->size; /* size is the beginning of diffs, diffidx the end*/ - if (pp->is->method->debug > 4) - logf(LOG_LOG,"isamd_LoadDiffs: within %d=%d:%d, d=%d ix=%d ", - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, pp->diffs, diffidx); - } - getDiffInfo(pp,diffidx); -} /* loadDiffs */ +/*************************************************************** + * Main block operations + ***************************************************************/ -void isamd_free_diffs(ISAMD_PP pp) -{ - int i; - if (pp->is->method->debug > 5) - logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo); - if (!pp->diffinfo) - return; - for (i=1;pp->diffinfo[i].decodeData;i++) - { - if (pp->is->method->debug > 8) - logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i, - pp->diffinfo[i].decodeData); - (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData); - } - xfree(pp->diffinfo); - if (pp->diffbuf != pp->buf) - xfree (pp->diffbuf); -} /* isamd_free_diffs */ +static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp) +{ /* allocates a new block for the main data, and links it in */ + int newblock; + if (0 == firstpp->next) + { /* special case, pp not yet allocated. */ + /*Started as largest size, that's fine */ + pp->pos = isamd_alloc_block(pp->is,pp->cat); + firstpp->next = isamd_addr(pp->pos,pp->cat); + if (pp->is->method->debug >3) + logf(LOG_LOG,"isamd_build: Alloc 1. dblock p=%d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); + } + newblock=isamd_alloc_block(pp->is,pp->cat); + pp->next=isamd_addr(newblock,pp->cat); + isamd_buildlaterblock(pp); + isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); + if (pp->is->method->debug >3) + logf(LOG_LOG,"isamd_build: Alloc nxt %d=%d:%d -> %d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, + isamd_addr(newblock,pp->cat), pp->cat, newblock); + pp->next=0; + pp->pos=newblock; + pp->size=pp->offset=ISAMD_BLOCK_OFFSET_N; + return pp; +} /* get_new_main_block */ + + +static ISAMD_PP append_main_item(ISAMD_PP firstpp, + ISAMD_PP pp, + struct it_key *i_key) +{ /* appends one item in the main data block, allocates new if needed */ + char *i_item= (char *) i_key; /* same as char */ + char *i_ptr=i_item; + char codebuff[128]; + char *c_ptr = codebuff; + int codelen; + char hexbuff[64]; + + int maxsize = pp->is->method->filecat[pp->is->max_cat].bsize; + + c_ptr=codebuff; + i_ptr=i_item; + (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData, + &c_ptr, &i_ptr); + codelen = c_ptr - codebuff; + assert ( (codelen<128) && (codelen>0)); + if (pp->is->method->debug >7) + logf(LOG_LOG,"isamd:build: coded %s nk=%d,ofs=%d-%d", + hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1, + pp->offset, pp->offset+codelen); + if (pp->offset + codelen > maxsize ) + { /* oops, block full - get a new one */ + pp = get_new_main_block( firstpp, pp ); + /* reset encoging and code again */ + (*pp->is->method->code_reset)(pp->decodeClientData); + c_ptr=codebuff; + i_ptr=i_item; + (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData, + &c_ptr, &i_ptr); + codelen = c_ptr - codebuff; + assert ( (codelen<128) && (codelen>0)); + if (pp->is->method->debug >7) + logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)", + hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); + } /* block full */ + + assert (pp->offset + codelen <= maxsize ); + + /* write the data into pp, now we must have room */ + memcpy(&(pp->buf[pp->offset]),codebuff,codelen); + pp->offset += codelen; + pp->size += codelen; + firstpp->numKeys++; + /* clear the next 4 bytes in block, to avoid confusions with diff lens */ + /* dirty, it should not be done here, but something slips somewhere, and */ + /* I hope this fixes it... - Heikki */ + codelen = pp->offset; + while ( (codelen < maxsize ) && (codelen <= pp->offset+4) ) + pp->buf[codelen++] = '\0'; + return pp; +} /* append_main_item */ + + +/*************************************************************** + * Read with merge + ***************************************************************/ /* Reads one item and corrects for the diffs, if any */ /* return 1 for ok, 0 for eof */ -int isamd_read_item (ISAMD_PP pp, char **dst) -{ +int isamd_read_item_merge ( + ISAMD_PP pp, + char **dst, + struct it_key *p_key, /* the data item that didn't fit*/ + /* ISAMD_I data) */ /* more input data comes here */ + FILTER filt) /* more input data comes here */ +{ /* The last two args can be null for ordinary reads */ char *keyptr; char *codeptr; char *codestart; @@ -225,65 +561,146 @@ int isamd_read_item (ISAMD_PP pp, char **dst) int i; /* looping diffs */ int cmp; int retry=1; - if (pp->diffs==0) /* no diffs, just read the thing */ - return isamd_read_main_item(pp,dst); + int oldoffs; + int rc; + + if (!pp->diffinfo) + { /* first time */ + getDiffInfo(pp); + + for(i=1; pp->diffinfo[i].difftype!=DT_NONE; i++) + ; /* find last diff */ + if (p_key) + { /* we have an extra item to inject into the merge */ + if (pp->is->method->debug >9) //!!!!! + logf(LOG_LOG,"isamd_read_item: going to merge with %d.%d", + p_key->sysno, p_key->seqno); + pp->diffinfo[i].key = *p_key; /* the key merge could not handle */ + pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1; + pp->diffinfo[i].key.seqno >>= 1; + pp->diffinfo[i].difftype=DT_INPU; + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: inpu key %d sys=%d seq=%d=2*%d+%d", + i, p_key->sysno, + pp->diffinfo[i].key.seqno*2 + pp->diffinfo[1].mode, + pp->diffinfo[i].key.seqno, + pp->diffinfo[i].mode); + p_key->sysno=p_key->seqno=0; /* used it up */ + } - if (!pp->diffinfo) - loadDiffs(pp); + if (filt) + { /* we have a whole input stream to inject */ + pp->diffinfo[i].difftype=DT_INPU; + } + } /* first time */ + while (retry) + { - retry=0; - if (0==pp->diffinfo[0].key.sysno) - { /* 0 is special case, main data. */ - keyptr=(char*) &(pp->diffinfo[0].key); - pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr); - if (pp->is->method->debug > 7) - logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)", - pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno, - pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno); - } /* get main data */ + retry=0; winner = 0; - for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++) + for (i=0; (!retry) && (pp->diffinfo[i].difftype); i++) { + if (0==pp->diffinfo[i].key.sysno) + {/* read a new one, if possible */ + if ((pp->diffinfo[i].difftype==DT_DIFF) && + (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx)) + { /* a normal kind of diff */ + oldoffs=pp->diffinfo[i].diffidx; + codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]); + keyptr=(char *)&(pp->diffinfo[i].key); + (*pp->is->method->code_item)(ISAMD_DECODE, + pp->diffinfo[i].decodeData, &keyptr, &codeptr); + pp->diffinfo[i].diffidx += codeptr-codestart; + pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1; + pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ; + if (pp->is->method->debug > 9) + logf(LOG_LOG,"isamd_read_item: dif[%d] at %d-%d: %s", + i,oldoffs, pp->diffinfo[i].diffidx, + hexdump(pp->buf+oldoffs, pp->diffinfo[i].diffidx-oldoffs,0)); + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: rd dif[%d] %d.%d (%x.%x)", + i, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); + } + else if ( pp->diffinfo[i].difftype==DT_MAIN) + { /* read a main item */ + assert(i==0); /* main data goes before any diffs */ + oldoffs=pp->offset; + keyptr=(char*) &(pp->diffinfo[0].key); + rc= isamd_read_main_item(pp,&keyptr); + if (0==rc) + { /* eof */ + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: eof (rc=%d) main ", + rc); + pp->diffinfo[i].maxidx=-1; + pp->diffinfo[i].key.sysno=0; + pp->diffinfo[i].key.seqno=0; + pp->diffinfo[i].difftype= DT_DONE; + } + else + { /* not eof */ + pp->diffinfo[i].mode = 1; + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: rd main %d-%d %d.%d (%x.%x) m=%d", + oldoffs,pp->offset, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].mode); + } /* not eof */ + } + else if (pp->diffinfo[i].difftype==DT_INPU) + { + keyptr = (char *) &pp->diffinfo[i].key; + /* rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); */ + rc = filter_read(filt, &pp->diffinfo[i].key, + &pp->diffinfo[i].mode); + if (!rc) + { /* did not get it */ + pp->diffinfo[i].key.sysno=0; + pp->diffinfo[i].maxidx=0; /* signal the end */ + pp->diffinfo[i].difftype=DT_DONE; + } + if (pp->is->method->debug >7) + logf(LOG_LOG,"merge: read inpu m=%d %d.%d (%x.%x)", + pp->diffinfo[i].mode, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno ); + } /* read an input item */ + } /* read a new one */ + if (pp->is->method->debug > 8) logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d", i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx); - if ( (0==pp->diffinfo[i].key.sysno) && - (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx)) - {/* read a new one, if possible */ - codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]); - keyptr=(char *)&(pp->diffinfo[i].key); - (*pp->is->method->code_item)(ISAMD_DECODE, - pp->diffinfo[i].decodeData, &keyptr, &codeptr); - pp->diffinfo[i].diffidx += codeptr-codestart; - pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1; - pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ; - if (pp->is->method->debug > 7) - logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i, - pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, - pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); - } if ( 0!= pp->diffinfo[i].key.sysno) { /* got a key, compare */ - cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key); + if (i!=winner) + cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key); + else + cmp=-1; if (0==pp->diffinfo[winner].key.sysno) cmp=-1; /* end of main sequence, take all diffs */ if (cmp<0) { if (pp->is->method->debug > 8) - logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)", - i, winner, + logf(LOG_LOG,"isamd_read_item: ins [%d]%d.%d < [%d]%d.%d", + i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, - pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, - pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno, + winner, pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno); if (pp->diffinfo[i].mode) /* insert diff, should always be */ winner = i; else + { + if (pp->is->method->debug > 1) + logf(LOG_LOG,"delete diff for nonexisting item"); assert(!"delete diff for nonexisting item"); /* is an assert too steep here? Not really.*/ + } } /* earlier key */ else if (cmp==0) { @@ -334,384 +751,99 @@ int isamd_read_item (ISAMD_PP pp, char **dst) assert(winner==0); /* if nothing found, nothing comes from a diff */ cmp= 0; /* eof */ } + if (cmp) + ++(pp->is->no_read_keys); + else + ++(pp->is->no_read_eof); + return cmp; } /* isamd_read_item */ -/***************************************************************** - * Support routines - *****************************************************************/ - -static void isamd_reduceblock(ISAMD_PP pp) -/* takes a large block, and reduces its category if possible */ -/* Presumably the first block in an isam-list */ -{ - if (pp->pos) - return; /* existing block, do not touch */ - if (pp->is->method->debug > 5) - logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d", - pp->pos, pp->cat, pp->size); - while ( ( pp->cat > 0 ) && (!pp->next) && - (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) ) - pp->cat--; - pp->pos = isamd_alloc_block(pp->is, pp->cat); - if (pp->is->method->debug > 5) - logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d", - pp->pos, pp->cat, pp->size); -} /* reduceblock */ - - - -static int save_first_pp ( ISAMD_PP firstpp) -{ - isamd_reduceblock(firstpp); - isamd_buildfirstblock(firstpp); - isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf); - return isamd_addr(firstpp->pos,firstpp->cat); -} - -static void save_last_pp (ISAMD_PP pp) +int isamd_read_item (ISAMD_PP pp, char **dst) { - pp->next = 0;/* just to be sure */ - isamd_buildlaterblock(pp); - isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); + return isamd_read_item_merge(pp,dst,0,0); } -static int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp) -{ - /* order of things: Better to save firstpp first, if there are just two */ - /* blocks, but last if there are blocks in between, as these have already */ - /* been saved... optimise later (that's why this is in its own func...*/ - int retval = save_first_pp(firstpp); - if (firstpp!=pp){ - save_last_pp(pp); - isamd_pp_close(pp); - } - isamd_pp_close(firstpp); - return retval; -} /* save_both_pps */ - -static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx) -{ /* reads the diff block (if separate) and sets diffidx right */ - ISAMD_PP pp=firstpp; - int i; - int diffidx; - if (pp->diffs == 0) - { /* no diffs yet, create room for them */ - if (separateDiffBlock(firstpp)) - { /* create a new block */ - pp=isamd_pp_open(pp->is,isamd_addr(0,firstpp->cat)); - pp->pos = isamd_alloc_block(pp->is, pp->cat); - firstpp->diffs = pp->pos*2 +1; - diffidx = pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N; - if (pp->is->method->debug >5) - logf(LOG_LOG,"isamd_appd: alloc diff (d=%d) %d=%d:%d ix=%d", - firstpp->diffs, - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - diffidx); - } - else - { /* prepare to append diffs in head */ - diffidx = pp->size; - pp->diffs = diffidx *2 +0; - i=diffidx; /* make an end marker */ - while ( ( i < pp->is->method->filecat[pp->cat].bsize) && - ( i <= diffidx + sizeof(int))) - pp->buf[i++]='\0'; - if (pp->is->method->debug >5) - logf(LOG_LOG,"isamd_appd: set up diffhead (d=%d) %d=%d:%d ix=%d", - firstpp->diffs, - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - diffidx); - } - } /* new */ - else - { /* existing diffs */ - if (pp->diffs & 1) - { /* diffs in a separate block, load it */ - pp=isamd_pp_open(pp->is, isamd_addr(firstpp->diffs/2,pp->cat)); - diffidx = pp->offset= pp->size; - if (pp->is->method->debug >5) - logf(LOG_LOG,"isamd_appd: loaded diff (d=%d) %d=%d:%d ix=%d", - firstpp->diffs, - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - diffidx); - } - else - { /* diffs within the nead */ - diffidx= pp->diffs/2; - if (pp->is->method->debug >5) - logf(LOG_LOG,"isamd_appd: diffs in head d=%d %d=%d:%d ix=%d sz=%d", - pp->diffs, - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - diffidx, pp->size); - } - } /* diffs exist already */ - *p_diffidx = diffidx; - return pp; -} /* read_diff_block */ - - - - -/******************************************************************* - * Building main blocks (no diffs) - *******************************************************************/ - - - -static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp) -{ /* allocates a new block for the main data, and links it in */ - int newblock; - if (firstpp==pp) - { /* special case: it was the first block. Save much later */ - if (0==firstpp->pos) - { /* firstpp not allocated yet, do so now, */ - /* to keep blocks in order. Don't save yet, though */ - firstpp->pos = isamd_alloc_block(pp->is, firstpp->cat); - } - newblock = isamd_alloc_block(pp->is, firstpp->cat); - firstpp->next = isamd_addr(newblock,firstpp->cat); - /* keep the largest category */ - pp=isamd_pp_open(pp->is,isamd_addr(0,firstpp->cat));/*don't load*/ - pp->pos=newblock; - pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N; - pp->next=0; - if (pp->is->method->debug >3) - logf(LOG_LOG,"isamd_g_mainblk: Alloc2 f=%d=%d:%d n=%d=%d:%d", - isamd_addr(firstpp->pos,firstpp->cat), - firstpp->cat, firstpp->pos, - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos ); - } - else - { /* it was not the first block */ - newblock = isamd_alloc_block(pp->is, firstpp->cat); - pp->next = isamd_addr(newblock,firstpp->cat); - if (pp->is->method->debug >3) - logf(LOG_LOG,"isamd_build: Alloc1 after p=%d=%d:%d->%d=%d:%d", - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - isamd_addr(newblock,pp->cat), pp->cat, newblock ); - isamd_buildlaterblock(pp); - isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); - pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N; - pp->next=0; - pp->cat = firstpp->cat; - pp->pos = newblock; - pp->cat = firstpp->cat; /* is already, never mind */ - } - return pp; -} /* get_new_main_block */ - - -static ISAMD_PP append_main_item(ISAMD_PP firstpp, - ISAMD_PP pp, - struct it_key *i_key, - void *encoder_data) -{ /* appends one item in the main data block, allocates new if needed */ - char *i_item= (char *) i_key; /* same as char */ - char *i_ptr=i_item; - char codebuff[128]; - char *c_ptr = codebuff; - int codelen; - char hexbuff[64]; - - int maxsize = pp->is->method->filecat[pp->is->max_cat].bsize; - - c_ptr=codebuff; - i_ptr=i_item; - (*pp->is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); - codelen = c_ptr - codebuff; - assert ( (codelen<128) && (codelen>0)); - if (pp->is->method->debug >7) - logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)", - hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); - - if (pp->offset + codelen > maxsize ) - { /* oops, block full - get a new one */ - pp = get_new_main_block( firstpp, pp ); - /* reset encoging and code again */ - (*pp->is->method->code_reset)(encoder_data); - c_ptr=codebuff; - i_ptr=i_item; - (*pp->is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); - codelen = c_ptr - codebuff; - assert ( (codelen<128) && (codelen>0)); - if (pp->is->method->debug >7) - logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)", - hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); - } /* block full */ - - /* write the data into pp, now we must have room */ - memcpy(&(pp->buf[pp->offset]),codebuff,codelen); - pp->offset += codelen; - pp->size += codelen; - firstpp->numKeys++; - /* clear the next 4 bytes in block, to avoid confusions with diff lens */ - /* dirty, it should not be done here, but something slips somewhere, and */ - /* I hope this fixes it... - Heikki */ - codelen = pp->offset; - while ( (codelen < maxsize ) && (codelen <= pp->offset+4) ) - pp->buf[codelen++] = '\0'; - return pp; -} /* append_main_item */ - - -static int isamd_build_first_block(ISAMD is, ISAMD_I data) -{ - struct it_key i_key; /* input key */ - char *i_item= (char *) &i_key; /* same as char */ - char *i_ptr=i_item; - int i_more =1; - int i_mode; /* 0 for delete, 1 for insert */ - - ISAMD_PP firstpp; - ISAMD_PP pp; - void *encoder_data; - - char hexbuff[64]; - - firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat)); - firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; - - encoder_data=(*is->method->code_start)(ISAMD_ENCODE); - - if (is->method->debug >2) - logf(LOG_LOG,"isamd_bld start: p=%d=%d:%d sz=%d maxsz=%d ", - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - pp->size, pp->is->method->filecat[pp->is->max_cat].bsize); - - /* read first input */ - i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); - if (i_more) - assert( i_ptr-i_item == sizeof(i_key) ); - - if (pp->is->method->debug >7) - logf(LOG_LOG,"isamd: build_fi start: m=%d %s", - i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); - - while (i_more) - { - if (i_mode!=0) - { /* ignore deletes here, should not happen */ - pp= append_main_item(firstpp, pp, &i_key, encoder_data); - } /* not delete */ - - /* (try to) read the next item */ - i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); - - if ( (i_more) && (pp->is->method->debug >7) ) - logf(LOG_LOG,"isamd: build_fi read: m=%d %s", - i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); - - } /* i_more */ - (*is->method->code_stop)(ISAMD_ENCODE, encoder_data); - - return save_both_pps( firstpp, pp ); - -} /* build_first_block */ - - /*************************************************************** - * Merging diffs + * Merge ***************************************************************/ - -static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ - ISAMD_PP *p_pp, /* diff block */ - struct it_key *p_key ) /* not used yet */ +static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ + struct it_key *p_key, /* the data item that didn't fit*/ + /* ISAMD_I data) */ /* more input data comes here */ + FILTER filt) /* more input data arriving here */ { - ISAMD_PP readpp = *p_firstpp; int diffidx; int killblk=0; struct it_key r_key; char * r_ptr; int r_more = 1; - ISAMD_PP firstpp; /* the new first, the one we write into */ ISAMD_PP pp; - void *encoder_data; + ISAMD_PP readpp=firstpp; + int retval=0; + int diffcat = firstpp->cat; /* keep the category of the diffblock even */ + /* if it is going to be empty now. */ + /* Alternative: Make it the minimal, and */ + /* resize later. Saves disk, but will lead */ + /* into bad seeks. */ + + ++(readpp->is->no_merges); /* set up diffs as they should be for reading */ - readpp->offset= ISAMD_BLOCK_OFFSET_1; - - if ( (*p_firstpp)->diffs & 1 ) - { /* separate diff block in *p_pp */ - killblk = readpp->diffs/2; - diffidx /*size*/ = readpp->is->method->filecat[readpp->cat].bsize; - readpp->diffbuf= xmalloc( diffidx); /* copy diffs to where read wants*/ - memcpy( readpp->diffbuf, &((*p_pp)->buf[0]), diffidx); - diffidx = ISAMD_BLOCK_OFFSET_N; - if (readpp->is->method->debug >2) - { - logf(LOG_LOG,"isamd_merge:separate diffs at ix=%d", - diffidx); - logf(LOG_LOG,"isamd_merge: dbuf=%p (from %p) pp=%p", - readpp->diffbuf, &((*p_pp)->buf[0]), (*p_pp) ); - } - } - else - { /* integrated diffs */ - assert ( *p_pp == *p_firstpp ); /* can only be in the first block */ - diffidx=readpp->size; - readpp->diffs = diffidx*2+0; - readpp->diffbuf=readpp->buf; - if (readpp->is->method->debug >2) - logf(LOG_LOG,"isamd_merge:local diffs at %d: %s", - diffidx,hexdump(&(readpp->diffbuf[diffidx]),8,0)); - } - - getDiffInfo(readpp,diffidx); - if (readpp->is->method->debug >8) - logf(LOG_LOG,"isamd_merge: diffinfo=%p", readpp->diffinfo); + diffidx = ISAMD_BLOCK_OFFSET_1; + //readpp->diffbuf=readpp->buf; // diffinfo has to duplicate it! + //getDiffInfo(readpp); // first read will make the diffinfo, at init - - if (killblk) - { /* we had a separate diff block, release it, we have copied the data */ - isamd_release_block(readpp->is, readpp->cat, killblk); - isamd_pp_close (*p_pp); - if (readpp->is->method->debug >3) - logf(LOG_LOG,"isamd_merge: released diff block %d=%d:%d", - isamd_addr(killblk,readpp->cat), readpp->cat, killblk ); - } - + if (readpp->is->method->debug >4) + logf(LOG_LOG,"isamd_merge: f=%d=%d:%d n=%d=%d:%d", + isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos, + firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next)); /* release our data block. Do before reading, when pos is stable ! */ - killblk=readpp->pos; - assert(killblk); - isamd_release_block(readpp->is, readpp->cat, killblk); - if (readpp->is->method->debug >3) - logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)", - isamd_addr(killblk,readpp->cat), readpp->cat, killblk ); + killblk=firstpp->pos; + if (killblk) + { + isamd_release_block(firstpp->is, firstpp->cat, killblk); + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)", + isamd_addr(killblk,firstpp->cat), firstpp->cat, killblk ); + } + r_ptr= (char *) &r_key; - r_more = isamd_read_item( readpp, &r_ptr); +/* r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data); */ + r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt); if (!r_more) { /* oops, all data has been deleted! what to do??? */ /* never mind, we have at least one more delta to add to the block */ /* pray that is not a delete as well... */ r_key.sysno = 0; r_key.seqno = 0; - if (readpp->is->method->debug >3) + if (readpp->is->method->debug >5) logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ", readpp->numKeys); - assert (readpp->numKeys == 0); } /* set up the new blocks for simple writing */ - firstpp=pp=isamd_pp_open(readpp->is,isamd_addr(0, readpp->is->max_cat)); - firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; - encoder_data = (*pp->is->method->code_start)(ISAMD_ENCODE); + firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); + firstpp->pos=isamd_alloc_block(firstpp->is,diffcat); + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: allocated new firstpp %d=%d:%d", + isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos ); + + pp=isamd_pp_open(readpp->is,isamd_addr(0,readpp->is->max_cat) ); + pp->offset=pp->size=ISAMD_BLOCK_OFFSET_N; while (r_more) { if (readpp->is->method->debug >6) logf(LOG_LOG,"isamd_merge: got key %d.%d", r_key.sysno, r_key.seqno ); - pp= append_main_item(firstpp, pp, &r_key, encoder_data); + pp= append_main_item(firstpp, pp, &r_key); if ( (readpp->pos != killblk ) && (0!=readpp->pos) ) { /* pos can get to 0 at end of main seq, if still diffs left...*/ @@ -725,25 +857,22 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ /* (try to) read next item */ r_ptr= (char *) &r_key; - r_more = isamd_read_item( readpp, &r_ptr); + r_more = isamd_read_item_merge( readpp, &r_ptr,0,filt); } /* while read */ - /* TODO: while pkey is an insert, and after last key inserted, append it */ - /* will prevent multiple merges on large insert runs */ - - /* set things up so that append can continue */ - isamd_reduceblock(firstpp); - firstpp->diffs=0; - - if (firstpp!=pp) - { /* the last data block is of no interest any more */ - save_last_pp(pp); - if (readpp->is->method->debug >4) - logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d", - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); - isamd_pp_close(pp); - } + +// firstpp->diffs=0; + + + isamd_reduceblock(pp); /* reduce size if possible */ + if (0==firstpp->next) + firstpp->next = isamd_addr(pp->pos,pp->cat); + save_last_pp(pp); + if (readpp->is->method->debug >4) + logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); + isamd_pp_close(pp); if (readpp->is->method->debug >5) logf(LOG_LOG,"isamd_merge: closing readpp %d=%d:%d di=%p", @@ -751,28 +880,35 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ readpp->diffinfo); isamd_pp_close(readpp); /* pos is 0 by now, at eof. close works anyway */ - (*firstpp->is->method->code_stop)(ISAMD_ENCODE, encoder_data); - - *p_firstpp = firstpp; - if (readpp->is->method->debug >2) - logf(LOG_LOG,"isamd_merge: merge ret %d=%d:%d nx=%d=%d:%d d=%d=2*%d+%d", - isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, - pp->next, isamd_type(pp->next), isamd_block(pp->next), - pp->diffs, pp->diffs/2, pp->diffs &1 ); - return 0; + logf(LOG_LOG,"isamd_merge: merge ret f=%d=%d:%d pp=%d=%d:%d", + isamd_addr(firstpp->pos,pp->cat), firstpp->cat, firstpp->pos, + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); + + firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; /* nothing there */ + memset(firstpp->buf,'\0',firstpp->is->method->filecat[firstpp->cat].bsize); + save_first_pp(firstpp); + retval = isamd_addr(firstpp->pos, firstpp->cat); + isamd_pp_close(firstpp); + + return retval; } /* merge */ + /*************************************************************** * Appending diffs ***************************************************************/ -static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) +static int append_diffs( + ISAMD is, + ISAMD_P ipos, + /*ISAMD_I data)*/ + FILTER filt) { struct it_key i_key; /* one input item */ char *i_item = (char *) &i_key; /* same as chars */ @@ -781,8 +917,6 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) int i_mode; /* 0 for delete, 1 for insert */ ISAMD_PP firstpp; - ISAMD_PP pp; - void *encoder_data; char hexbuff[64]; int diffidx=0; int maxsize=0; @@ -791,26 +925,43 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) char *c_ptr = codebuff; int codelen; int merge_rc; + int retval=0; + + if (0==ipos) + { + firstpp=isamd_pp_open(is, isamd_addr(0,0) ); + firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; + /* create in smallest category, will expand later */ + ++(is->no_fbuilds); + } + else + { + firstpp=isamd_pp_open(is, ipos); + ++(is->no_appds); + } - firstpp=isamd_pp_open(is, ipos); if (is->method->debug >2) - logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d d=%d=%d*2+%d nk=%d", + logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d", ipos, isamd_type(ipos), isamd_block(ipos), - firstpp->diffs, firstpp->diffs/2, firstpp->diffs & 1, firstpp->numKeys); - pp=read_diff_block(firstpp, &diffidx); - encoder_data=(*is->method->code_start)(ISAMD_ENCODE); - maxsize = is->method->filecat[pp->cat].bsize; + firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next), + firstpp->numKeys); + maxsize = is->method->filecat[firstpp->cat].bsize; + + difflenidx = diffidx = firstpp->size; - difflenidx = diffidx; diffidx+=sizeof(int); /* difflen will be stored here */ /* read first input */ - i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + //i_ptr = i_item; //!!! + i_more = filter_read(filt, &i_key, &i_mode); + /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */ if (is->method->debug >6) - logf(LOG_LOG,"isamd_appd: start with m=%d %s", - i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); + logf(LOG_LOG,"isamd_appd: start m=%d %d.%d=%x.%x: %d", + i_mode, + i_key.sysno, i_key.seqno, + i_key.sysno, i_key.seqno, + i_key.sysno*2+i_mode); while (i_more) { @@ -819,120 +970,195 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) i_key.seqno = i_key.seqno * 2 + i_mode; c_ptr=codebuff; - i_ptr=i_item; - (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); + i_ptr=i_item; + (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData, + &c_ptr, &i_ptr); codelen = c_ptr - codebuff; assert ( (codelen<128) && (codelen>0)); if (is->method->debug >7) - logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)", + logf(LOG_LOG,"isamd_appd: coded %d: %s (nk=%d) (ix=%d)", codelen, hexdump(codebuff, codelen,hexbuff), firstpp->numKeys,diffidx); if (diffidx + codelen > maxsize ) { /* block full */ - if (is->method->debug >3) - logf(LOG_LOG,"isamd_appd: block full (ix=%d mx=%d lix=%d)", - diffidx, maxsize, difflenidx); - if (is->method->debug >8) - logf(LOG_LOG,"isamd_appd: block pp=%p buf=%p [%d]:%s", - pp, pp->buf, - difflenidx, hexdump(&pp->buf[difflenidx],8,0)); - merge_rc = merge (&firstpp, &pp, &i_key); - if (0!=merge_rc) - return merge_rc; /* merge handled them all ! */ - - /* set things up so we can continue */ - pp = read_diff_block(firstpp, &diffidx); - (*is->method->code_reset)(encoder_data); - maxsize = is->method->filecat[pp->cat].bsize; - difflenidx=diffidx; - diffidx+=sizeof(int); - - /* code the current input key again */ - c_ptr=codebuff; - i_ptr=i_item; - (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); - codelen = c_ptr - codebuff; - assert ( (codelen<128) && (codelen>0)); - if (is->method->debug >7) - logf(LOG_LOG,"isamd_appd: recoded into %d: %s (nk=%d) (ix=%d)", - codelen, hexdump(codebuff, codelen,hexbuff), - firstpp->numKeys,diffidx); - + while ( (firstpp->cat < firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* try to increase the block size */ + if (firstpp->pos > 0) /* free the old block if allocated */ + isamd_release_block(is, firstpp->cat, firstpp->pos); + ++firstpp->cat; + maxsize = is->method->filecat[firstpp->cat].bsize; + firstpp->pos=0; /* need to allocate it when saving */ + if (is->method->debug >3) + logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)", + firstpp->cat, maxsize); + } + if ((firstpp->cat >= firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* max size - can't help, need to merge it */ + if (is->method->debug >7) + logf(LOG_LOG,"isamd_appd: need to merge"); + if (is->method->debug >9) //!!!!! + logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d", + i_mode, i_key.sysno, i_key.seqno); + merge_rc = merge (firstpp, &i_key, filt); + if (0!=merge_rc) + return merge_rc; /* merge handled them all ! */ + assert(!"merge returned zero ??"); + } /* need to merge */ } /* block full */ - - /* Note: this goes horribly wrong if there is no room for the diff */ - /* after the merge! The solution is to increase the limit in */ - /* separateDiffBlock, to force a separate diff block earlier, and not */ - /* to have absurdly small blocks */ + + if (!( diffidx+codelen <= maxsize )) + { /* bug hunting */ + logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d", + diffidx, codelen, diffidx+codelen, maxsize); + logf(LOG_LOG,"ipos=%d f=%d=%d:%d", + ipos, + isamd_addr(firstpp->pos, firstpp->cat), + firstpp->cat, firstpp->pos ); + } assert ( diffidx+codelen <= maxsize ); /* save the diff */ - memcpy(&(pp->buf[diffidx]),codebuff,codelen); + memcpy(&(firstpp->buf[diffidx]),codebuff,codelen); diffidx += codelen; + firstpp->size = firstpp->offset = diffidx; + if (i_mode) firstpp->numKeys++; /* insert diff */ else firstpp->numKeys--; /* delete diff */ /* update length of this diff run */ - memcpy(&(pp->buf[difflenidx]),&diffidx,sizeof(diffidx)); - if (firstpp==pp) - firstpp->diffs =diffidx*2+0; - else - pp->size =diffidx; + memcpy(&(firstpp->buf[difflenidx]),&diffidx,sizeof(diffidx)); /* (try to) read the next input */ i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + i_more = filter_read(filt, &i_key, &i_mode); + /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */ if ( (i_more) && (is->method->debug >6) ) - logf(LOG_LOG,"isamd_appd: got m=%d %s", - i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); + logf(LOG_LOG,"isamd_appd: got m=%d %d.%d=%x.%x: %d", + i_mode, + i_key.sysno, i_key.seqno, + i_key.sysno, i_key.seqno, + i_key.sysno*2+i_mode); } /* more loop */ /* clear the next difflen, if room for such */ difflenidx = diffidx; - while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidxbuf[difflenidx++]='\0'; - + while ( (difflenidx-diffidx<=sizeof(int)+1) && (difflenidxbuf[difflenidx++]='\0'; - (*firstpp->is->method->code_stop)(ISAMD_ENCODE, encoder_data); - return save_both_pps( firstpp, pp ); + if (0==firstpp->pos) /* need to (re)alloc the block */ + firstpp->pos = isamd_alloc_block(is, firstpp->cat); + retval = save_first_pp( firstpp ); + isamd_pp_close(firstpp); + + return retval; } /* append_diffs */ + /************************************************************* - * isamd_append itself, Sweet, isn't it + * isamd_append itself *************************************************************/ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) { - int retval=0; + FILTER F = filter_open(is,data); + ISAMD_P rc=0; - if (0==ipos) - retval = isamd_build_first_block(is,data); - else - retval = append_diffs(is,ipos,data); + int olddebug= is->method->debug; + if (ipos == 7320) + is->method->debug = 99; /*!*/ + + if ( filter_isempty(F) ) /* can be, if del-ins of the same */ + { + if (is->method->debug >3) + logf(LOG_LOG,"isamd_appd: nothing to do for %d=",ipos); + filter_close(F); + ++(is->no_non); + return ipos; /* without doing anything at all */ + } - if (0) /*!*/ + if ( ( 0==ipos) && filter_only_one(F) ) { - void *p1=xmalloc(100); - void *p2=xmalloc(100); - void *p3=xmalloc(100); - logf(LOG_LOG,"Traversing xmalloc stuff. p1=%p p2=%p p3=%p",p1,p2,p3); - xmalloc_trav("end of append"); /*!*/ - assert(!"foo"); + struct it_key k; + int mode; + filter_read(F,&k,&mode); + assert(mode); + rc = singleton_encode(&k); + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton %d (%x)", + rc,rc); + if (rc) + is->no_singles++; + assert ( (rc==0) || is_singleton(rc) ); } - - return retval; + if ( 0==rc) /* either not single, or it did not fit */ + { + rc = append_diffs(is,ipos,F); + assert ( ! is_singleton(rc) ); + /* can happen if we run out of bits, so that block numbers overflow */ + /* to SINGLETON_BIT */ + } + filter_close(F); + + if (is->method->debug >2) + logf(LOG_LOG,"isamd_appd: ret %d=%x (%d=%x)", + rc,rc,ipos,ipos); + is->method->debug=olddebug; /*!*/ + return rc; } /* isamd_append */ + + + + + /* * $Log: merge-d.c,v $ - * Revision 1.13 1999-08-18 13:59:19 heikki + * Revision 1.25 1999-11-30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.24 1999/10/05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.23 1999/09/27 14:36:36 heikki + * singletons + * + * Revision 1.22 1999/09/23 18:01:18 heikki + * singleton optimising + * + * Revision 1.21 1999/09/21 17:36:43 heikki + * Added filter function. Not much of effect on the small test set... + * + * Revision 1.20 1999/09/20 15:48:06 heikki + * Small changes + * + * Revision 1.19 1999/09/13 13:28:28 heikki + * isam-d optimizing: merging input data in the same go + * + * Revision 1.18 1999/08/25 18:09:24 heikki + * Starting to optimize + * + * Revision 1.17 1999/08/24 13:17:42 heikki + * Block sizes, comments + * + * Revision 1.16 1999/08/24 10:12:02 heikki + * Comments about optimising + * + * Revision 1.15 1999/08/22 08:26:34 heikki + * COmments + * + * Revision 1.14 1999/08/20 12:25:58 heikki + * Statistics in isamd + * + * Revision 1.13 1999/08/18 13:59:19 heikki * Fixed another unlikely difflen bug * * Revision 1.12 1999/08/18 13:28:17 heikki @@ -970,5 +1196,3 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */ - -