X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamc%2Fmerge-d.c;h=3f12488133262cfbe497d24ed0f295831fd2062d;hb=6d1c06c091a197d911a8e15737ff01ec7ab92166;hp=9f54c5453eb4f0a54ecc190b97caff1abdba986c;hpb=cb4bd60bc882597a3232f8098bd755385bfca9f2;p=idzebra-moved-to-github.git diff --git a/isamc/merge-d.c b/isamc/merge-d.c index 9f54c54..3f12488 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,18 +3,32 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.6 1999-07-23 15:43:05 heikki Exp $ + * $Id: merge-d.c,v 1.16 1999-08-24 10:12:02 heikki Exp $ * - * todo - * - merge when needed - * - single-entry optimizing + * missing + * + * optimize + * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) + * - single-entry optimizing (keep the one entry in the dict, no block) * - study and optimize block sizes (later) + * - Clean up the different ways diffs are handled in writing and reading + * - Keep a merge-count in the firstpp, and if the block has already been + * merged, reduce it to a larger size even if it could fit in a small one! + * - Keep minimum freespace in the category table, and use that in reduce! + * - pass a space-needed for separateDiffBlock and reduce to be able to + * reserve more room for diffs, or to force a separate (larger?) block + * - Idea: Simplify the structure, so that the first block is always diffs. + * On small blocks, that is all we have. Once a block has been merged, we + * allocate the first main block and a (new) firstblock ffor diffs. From + * that point on the word has two blocks for it. + * - On allocating more blocks (in append), check the order of blocks, and + * if needed, swap them. + * - In merge, merge also with the input data. * * bugs - * not yet ready * * caveat - * There is aconfusion about the block addresses. cat or type is the category, + * There is a confusion about the block addresses. cat or type is the category, * pos or block is the block number. pp structures keep these two separate, * and combine when saving the pp. The next pointer in the pp structure is * also a combined address, but needs to be combined every time it is needed, @@ -23,6 +37,23 @@ * in that order. This conflicts with the order these are often mentioned in * the debug log calls, and other places, leading to small mistakes here * and there. + * + * Needs cleaning! The way diff blocks are handled in append and reading is + * quite different, and likely to give maintenance problems. + * + * log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) ) + * 0 = no logging. Default + * 1 = no logging here. isamd logs overall statistics + * 2 = Each call to isamd_append with start address and no more + * 3 = Start and type of append, start of merge, and result of append + * 4 = Block allocations + * 5 = Block-level operations (read/write) + * 6 = Details about diff blocks etc. + * 7 = Log each record as it passes the system (once) + * 8 = Log raw and (de)coded data + * 9 = Anything else that may be useful + * .. = Anything needed to hunt a specific bug + * (note that all tests in the code are like debug>3, which means 4 or above!) */ #include @@ -61,11 +92,14 @@ static char *hexdump(unsigned char *p, int len, char *buff) { static int separateDiffBlock(ISAMD_PP pp) { + int limit = sizeof(int) + 8; if (pp->next) return 1; /* multi-block chains always have a separate diff block */ - return ( pp->size + 2*sizeof(int) > pp->is->method->filecat[pp->cat].bsize); + return ( pp->size + limit >= pp->is->method->filecat[pp->cat].bsize); /* make sure there is at least room for the length and one diff. if not, */ - /* it goes to a separate block */ + /* it goes to a separate block. Assumes max diff is 8 bytes. Not */ + /* unreaalistic in large data sets, where first sysno may be very large, */ + /* and even the first seqno may be quite something. */ /* todo: Make the limit adjustable in the filecat table ! */ } @@ -82,10 +116,10 @@ static void getDiffInfo(ISAMD_PP pp, int diffidx) int i=1; /* [0] is used for the main data */ int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s); - pp->diffinfo = xmalloc( diffsz ); + pp->diffinfo = xmalloc( diffsz ); memset(pp->diffinfo,'\0',diffsz); - if (pp->is->method->debug > 4) - logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d", + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d", isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos); assert(pp->diffbuf); @@ -93,7 +127,7 @@ static void getDiffInfo(ISAMD_PP pp, int diffidx) { if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize ) { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d", diffidx, i); return; /* whole block done */ @@ -103,11 +137,23 @@ static void getDiffInfo(ISAMD_PP pp, int diffidx) if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p", pp->diffinfo[i].maxidx, diffidx, pp->diffbuf); - assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize); + + if ( (pp->is->method->debug > 0) && + (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) + { /* bug-hunting, this fails on some long runs that log too much */ + logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", + __FILE__,__LINE__, diffidx); + logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, + pp->is->method->filecat[pp->cat].bsize); + logf(LOG_LOG,"pp=%d=%d:%d pp->nx=%d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->pos, pp->cat, + pp->next, isamd_type(pp->next), isamd_block(pp->next) ); + } + assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize+1); if (0==pp->diffinfo[i].maxidx) { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 5) //!!! 4 logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d", diffidx, i); return; /* end marker */ @@ -115,7 +161,7 @@ static void getDiffInfo(ISAMD_PP pp, int diffidx) diffidx += sizeof(int); pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE); pp->diffinfo[i].diffidx = diffidx; - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s", i,diffidx-sizeof(int),pp->diffinfo[i].maxidx, hexdump((char *)&pp->diffbuf[diffidx-4],8,0) ); @@ -160,10 +206,17 @@ static void loadDiffs(ISAMD_PP pp) void isamd_free_diffs(ISAMD_PP pp) { int i; + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo); if (!pp->diffinfo) return; - for (i=0;pp->diffinfo[i].decodeData;i++) - (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData); + for (i=1;pp->diffinfo[i].decodeData;i++) + { + if (pp->is->method->debug > 8) + logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i, + pp->diffinfo[i].decodeData); + (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData); + } xfree(pp->diffinfo); if (pp->diffbuf != pp->buf) xfree (pp->diffbuf); @@ -193,7 +246,7 @@ int isamd_read_item (ISAMD_PP pp, char **dst) { /* 0 is special case, main data. */ keyptr=(char*) &(pp->diffinfo[0].key); pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr); - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 7) logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)", pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno, pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno); @@ -201,7 +254,7 @@ int isamd_read_item (ISAMD_PP pp, char **dst) winner = 0; for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++) { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 8) logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d", i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx); @@ -216,7 +269,7 @@ int isamd_read_item (ISAMD_PP pp, char **dst) pp->diffinfo[i].diffidx += codeptr-codestart; pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1; pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ; - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 7) logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); @@ -228,7 +281,7 @@ int isamd_read_item (ISAMD_PP pp, char **dst) cmp=-1; /* end of main sequence, take all diffs */ if (cmp<0) { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 8) logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)", i, winner, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, @@ -239,13 +292,13 @@ int isamd_read_item (ISAMD_PP pp, char **dst) winner = i; else assert(!"delete diff for nonexisting item"); - /* is an assert too steep here?*/ + /* is an assert too steep here? Not really.*/ } /* earlier key */ else if (cmp==0) { if (!pp->diffinfo[i].mode) /* delete diff. should always be */ { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 8) logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)", i, winner, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, @@ -253,12 +306,13 @@ int isamd_read_item (ISAMD_PP pp, char **dst) pp->diffinfo[winner].key.sysno=0; /* delete it */ } else - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 2) logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)", i, winner, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); /* skip the insert, since we already have it in the base */ + /* Should we fail an assertion here??? */ pp->diffinfo[i].key.sysno=0; /* done with the delete */ retry=1; /* start all over again */ } /* matching key */ @@ -269,7 +323,7 @@ int isamd_read_item (ISAMD_PP pp, char **dst) if ( pp->diffinfo[winner].key.sysno) { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 7) logf(LOG_LOG,"isamd_read_item: got %d %d.%d (%x.%x)", winner, pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno, @@ -281,7 +335,7 @@ int isamd_read_item (ISAMD_PP pp, char **dst) } else { - if (pp->is->method->debug > 4) + if (pp->is->method->debug > 7) logf(LOG_LOG,"isamd_read_item: eof w=%d %d.%d (%x.%x)", winner, pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno, @@ -303,14 +357,14 @@ static void isamd_reduceblock(ISAMD_PP pp) { if (pp->pos) return; /* existing block, do not touch */ - if (pp->is->method->debug > 2) + if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d", pp->pos, pp->cat, pp->size); while ( ( pp->cat > 0 ) && (!pp->next) && (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) ) pp->cat--; pp->pos = isamd_alloc_block(pp->is, pp->cat); - if (pp->is->method->debug > 2) + if (pp->is->method->debug > 5) logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d", pp->pos, pp->cat, pp->size); } /* reduceblock */ @@ -351,6 +405,7 @@ static int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp) static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx) { /* reads the diff block (if separate) and sets diffidx right */ ISAMD_PP pp=firstpp; + int i; int diffidx; if (pp->diffs == 0) { /* no diffs yet, create room for them */ @@ -360,7 +415,7 @@ static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx) pp->pos = isamd_alloc_block(pp->is, pp->cat); firstpp->diffs = pp->pos*2 +1; diffidx = pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N; - if (pp->is->method->debug >3) + if (pp->is->method->debug >5) logf(LOG_LOG,"isamd_appd: alloc diff (d=%d) %d=%d:%d ix=%d", firstpp->diffs, isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, @@ -369,8 +424,12 @@ static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx) else { /* prepare to append diffs in head */ diffidx = pp->size; - pp->diffs = diffidx *2 +0; - if (pp->is->method->debug >3) + pp->diffs = diffidx *2 +0; + i=diffidx; /* make an end marker */ + while ( ( i < pp->is->method->filecat[pp->cat].bsize) && + ( i <= diffidx + sizeof(int))) + pp->buf[i++]='\0'; + if (pp->is->method->debug >5) logf(LOG_LOG,"isamd_appd: set up diffhead (d=%d) %d=%d:%d ix=%d", firstpp->diffs, isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, @@ -383,7 +442,7 @@ static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx) { /* diffs in a separate block, load it */ pp=isamd_pp_open(pp->is, isamd_addr(firstpp->diffs/2,pp->cat)); diffidx = pp->offset= pp->size; - if (pp->is->method->debug >3) + if (pp->is->method->debug >5) logf(LOG_LOG,"isamd_appd: loaded diff (d=%d) %d=%d:%d ix=%d", firstpp->diffs, isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, @@ -392,7 +451,7 @@ static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx) else { /* diffs within the nead */ diffidx= pp->diffs/2; - if (pp->is->method->debug >3) + if (pp->is->method->debug >5) logf(LOG_LOG,"isamd_appd: diffs in head d=%d %d=%d:%d ix=%d sz=%d", pp->diffs, isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, @@ -429,8 +488,8 @@ static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp) pp->pos=newblock; pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N; pp->next=0; - if (pp->is->method->debug >3) - logf(LOG_LOG,"isamd_build: Alloc2 f=%d (%d:%d) n=%d(%d:%d)", + if (pp->is->method->debug >3) + logf(LOG_LOG,"isamd_g_mainblk: Alloc2 f=%d=%d:%d n=%d=%d:%d", isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos, isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos ); @@ -439,12 +498,17 @@ static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp) { /* it was not the first block */ newblock = isamd_alloc_block(pp->is, firstpp->cat); pp->next = isamd_addr(newblock,firstpp->cat); + if (pp->is->method->debug >3) + logf(LOG_LOG,"isamd_build: Alloc1 after p=%d=%d:%d->%d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, + isamd_addr(newblock,pp->cat), pp->cat, newblock ); isamd_buildlaterblock(pp); isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N; pp->next=0; pp->cat = firstpp->cat; - pp->pos = isamd_block(firstpp->next); + pp->pos = newblock; + pp->cat = firstpp->cat; /* is already, never mind */ } return pp; } /* get_new_main_block */ @@ -469,7 +533,7 @@ static ISAMD_PP append_main_item(ISAMD_PP firstpp, (*pp->is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); codelen = c_ptr - codebuff; assert ( (codelen<128) && (codelen>0)); - if (pp->is->method->debug >3) + if (pp->is->method->debug >7) logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)", hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); @@ -483,7 +547,7 @@ static ISAMD_PP append_main_item(ISAMD_PP firstpp, (*pp->is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); codelen = c_ptr - codebuff; assert ( (codelen<128) && (codelen>0)); - if (pp->is->method->debug >3) + if (pp->is->method->debug >7) logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)", hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); } /* block full */ @@ -493,6 +557,12 @@ static ISAMD_PP append_main_item(ISAMD_PP firstpp, pp->offset += codelen; pp->size += codelen; firstpp->numKeys++; + /* clear the next 4 bytes in block, to avoid confusions with diff lens */ + /* dirty, it should not be done here, but something slips somewhere, and */ + /* I hope this fixes it... - Heikki */ + codelen = pp->offset; + while ( (codelen < maxsize ) && (codelen <= pp->offset+4) ) + pp->buf[codelen++] = '\0'; return pp; } /* append_main_item */ @@ -511,14 +581,17 @@ static int isamd_build_first_block(ISAMD is, ISAMD_I data) char hexbuff[64]; + ++(is->files[0].no_fbuilds); + firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat)); firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; + encoder_data=(*is->method->code_start)(ISAMD_ENCODE); - if (is->method->debug >3) - logf(LOG_LOG,"isamd_bld start: p=%d c=%d sz=%d maxsz=%d ", - pp->pos, pp->cat, pp->size, - pp->is->method->filecat[pp->is->max_cat].bsize); + if (is->method->debug >2) + logf(LOG_LOG,"isamd_bld start: p=%d=%d:%d sz=%d maxsz=%d ", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, + pp->size, pp->is->method->filecat[pp->is->max_cat].bsize); /* read first input */ i_ptr = i_item; @@ -526,7 +599,7 @@ static int isamd_build_first_block(ISAMD is, ISAMD_I data) if (i_more) assert( i_ptr-i_item == sizeof(i_key) ); - if (pp->is->method->debug >3) + if (pp->is->method->debug >7) logf(LOG_LOG,"isamd: build_fi start: m=%d %s", i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); @@ -541,11 +614,12 @@ static int isamd_build_first_block(ISAMD is, ISAMD_I data) i_ptr = i_item; i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); - if ( (i_more) && (pp->is->method->debug >3) ) - logf(LOG_LOG,"isamd: build_fi start: m=%d %s", + if ( (i_more) && (pp->is->method->debug >7) ) + logf(LOG_LOG,"isamd: build_fi read: m=%d %s", i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); } /* i_more */ + (*is->method->code_stop)(ISAMD_ENCODE, encoder_data); return save_both_pps( firstpp, pp ); @@ -570,34 +644,60 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ ISAMD_PP firstpp; /* the new first, the one we write into */ ISAMD_PP pp; void *encoder_data; + + ++(readpp->is->files[0].no_merges); /* set up diffs as they should be for reading */ readpp->offset= ISAMD_BLOCK_OFFSET_1; - if (*p_pp == *p_firstpp) - { /* integrated diffs */ - diffidx=readpp->size; - readpp->diffs = diffidx*2+0; - readpp->diffbuf=readpp->buf; /*? does this get freed right ??? */ - if (readpp->is->method->debug >3) - logf(LOG_LOG,"isamd_merge:local diffs at %d: %s", - diffidx,hexdump(&(readpp->diffbuf[diffidx]),8,0)); - } - else + if ( (*p_firstpp)->diffs & 1 ) { /* separate diff block in *p_pp */ killblk = readpp->diffs/2; - diffidx = readpp->is->method->filecat[readpp->cat].bsize; + diffidx /*size*/ = readpp->is->method->filecat[readpp->cat].bsize; readpp->diffbuf= xmalloc( diffidx); /* copy diffs to where read wants*/ memcpy( readpp->diffbuf, &((*p_pp)->buf[0]), diffidx); diffidx = ISAMD_BLOCK_OFFSET_N; - if (readpp->is->method->debug >3) { - logf(LOG_LOG,"isamd_merge:separate diffs at ix=%d", - diffidx); - logf(LOG_LOG,"isamd_merge: dbuf=%p (from %p) pp=%p", - readpp->diffbuf, &((*p_pp)->buf[0]), (*p_pp) ); + if (readpp->is->method->debug >2) + { + logf(LOG_LOG,"isamd_merge:separate diffs at ix=%d", + diffidx); + logf(LOG_LOG,"isamd_merge: dbuf=%p (from %p) pp=%p", + readpp->diffbuf, &((*p_pp)->buf[0]), (*p_pp) ); } } + else + { /* integrated diffs */ + assert ( *p_pp == *p_firstpp ); /* can only be in the first block */ + diffidx=readpp->size; + readpp->diffs = diffidx*2+0; + readpp->diffbuf=readpp->buf; + if (readpp->is->method->debug >2) + logf(LOG_LOG,"isamd_merge:local diffs at %d: %s", + diffidx,hexdump(&(readpp->diffbuf[diffidx]),8,0)); + } + getDiffInfo(readpp,diffidx); + if (readpp->is->method->debug >8) + logf(LOG_LOG,"isamd_merge: diffinfo=%p", readpp->diffinfo); + + + if (killblk) + { /* we had a separate diff block, release it, we have copied the data */ + isamd_release_block(readpp->is, readpp->cat, killblk); + isamd_pp_close (*p_pp); + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: released diff block %d=%d:%d", + isamd_addr(killblk,readpp->cat), readpp->cat, killblk ); + } + + + /* release our data block. Do before reading, when pos is stable ! */ + killblk=readpp->pos; + assert(killblk); + isamd_release_block(readpp->is, readpp->cat, killblk); + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)", + isamd_addr(killblk,readpp->cat), readpp->cat, killblk ); r_ptr= (char *) &r_key; r_more = isamd_read_item( readpp, &r_ptr); @@ -613,19 +713,7 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ assert (readpp->numKeys == 0); } - if (killblk) - { /* we had a separate diff block, release it, we have the data */ - isamd_release_block(readpp->is, readpp->cat, killblk); - if (readpp->is->method->debug >3) - logf(LOG_LOG,"isamd_merge: released diff block %d=%d:%d", - isamd_addr(killblk,readpp->cat), readpp->cat, killblk ); - } - killblk=readpp->pos; - isamd_release_block(readpp->is, readpp->cat, killblk); - if (readpp->is->method->debug >3) - logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)", - isamd_addr(killblk,readpp->cat), readpp->cat, killblk ); - + /* set up the new blocks for simple writing */ firstpp=pp=isamd_pp_open(readpp->is,isamd_addr(0, readpp->is->max_cat)); firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; @@ -633,17 +721,18 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ while (r_more) { - if (readpp->is->method->debug >4) + if (readpp->is->method->debug >6) logf(LOG_LOG,"isamd_merge: got key %d.%d", r_key.sysno, r_key.seqno ); pp= append_main_item(firstpp, pp, &r_key, encoder_data); - if ( readpp->pos != killblk ) - { + if ( (readpp->pos != killblk ) && (0!=readpp->pos) ) + { /* pos can get to 0 at end of main seq, if still diffs left...*/ + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: released block %d (%d:%d) now %d=%d:%d", + isamd_addr(killblk,readpp->cat), readpp->cat, killblk, + isamd_addr(readpp->pos,readpp->cat),readpp->cat, readpp->pos ); isamd_release_block(readpp->is, readpp->cat, readpp->pos); - if (readpp->is->method->debug >3) - logf(LOG_LOG,"isamd_merge: released data block %d (%d:%d)", - killblk, isamd_type(killblk), isamd_block(killblk) ); killblk=readpp->pos; } @@ -653,24 +742,34 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ } /* while read */ - /* TODO: while pkey is an insert, and after last key inserted, append it */ - /* will prevent multiple merges on large insert runs */ - - /* set things up so that merge can continue */ + /* set things up so that append can continue */ isamd_reduceblock(firstpp); firstpp->diffs=0; if (firstpp!=pp) { /* the last data block is of no interest any more */ save_last_pp(pp); - if (readpp->is->method->debug >3) + if (readpp->is->method->debug >4) logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d", isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); isamd_pp_close(pp); } + + if (readpp->is->method->debug >5) + logf(LOG_LOG,"isamd_merge: closing readpp %d=%d:%d di=%p", + isamd_addr(readpp->pos,readpp->cat), readpp->cat, readpp->pos, + readpp->diffinfo); + isamd_pp_close(readpp); /* pos is 0 by now, at eof. close works anyway */ + + (*firstpp->is->method->code_stop)(ISAMD_ENCODE, encoder_data); *p_firstpp = firstpp; - + + if (readpp->is->method->debug >2) + logf(LOG_LOG,"isamd_merge: merge ret %d=%d:%d nx=%d=%d:%d d=%d=2*%d+%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, + pp->next, isamd_type(pp->next), isamd_block(pp->next), + pp->diffs, pp->diffs/2, pp->diffs &1 ); return 0; } /* merge */ @@ -702,12 +801,15 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) char *c_ptr = codebuff; int codelen; int merge_rc; + int mergecount=0; + + ++(is->files[0].no_appds); firstpp=isamd_pp_open(is, ipos); - if (is->method->debug >4) - logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d d=%d=%d*2+%d", + if (is->method->debug >2) + logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d d=%d=%d*2+%d nk=%d", ipos, isamd_type(ipos), isamd_block(ipos), - firstpp->diffs, firstpp->diffs/2, firstpp->diffs & 1); + firstpp->diffs, firstpp->diffs/2, firstpp->diffs & 1, firstpp->numKeys); pp=read_diff_block(firstpp, &diffidx); encoder_data=(*is->method->code_start)(ISAMD_ENCODE); maxsize = is->method->filecat[pp->cat].bsize; @@ -719,7 +821,7 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) i_ptr = i_item; i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); - if (is->method->debug >3) + if (is->method->debug >6) logf(LOG_LOG,"isamd_appd: start with m=%d %s", i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); @@ -734,7 +836,7 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); codelen = c_ptr - codebuff; assert ( (codelen<128) && (codelen>0)); - if (is->method->debug >3) + if (is->method->debug >7) logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)", codelen, hexdump(codebuff, codelen,hexbuff), firstpp->numKeys,diffidx); @@ -744,23 +846,42 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) if (is->method->debug >3) logf(LOG_LOG,"isamd_appd: block full (ix=%d mx=%d lix=%d)", diffidx, maxsize, difflenidx); - if (is->method->debug >5) + if (is->method->debug >8) logf(LOG_LOG,"isamd_appd: block pp=%p buf=%p [%d]:%s", pp, pp->buf, difflenidx, hexdump(&pp->buf[difflenidx],8,0)); + if (mergecount++) + ++(is->files[0].no_remerges); merge_rc = merge (&firstpp, &pp, &i_key); - if (merge_rc) + if (0!=merge_rc) return merge_rc; /* merge handled them all ! */ - /* set things up so we can continue */ - pp = read_diff_block(firstpp, &diffidx); - (*is->method->code_reset)(encoder_data); - maxsize = is->method->filecat[pp->cat].bsize; - difflenidx=diffidx; - diffidx+=sizeof(int); + /* set things up so we can continue */ + pp = read_diff_block(firstpp, &diffidx); + (*is->method->code_reset)(encoder_data); + maxsize = is->method->filecat[pp->cat].bsize; + difflenidx=diffidx; + diffidx+=sizeof(int); + + /* code the current input key again */ + c_ptr=codebuff; + i_ptr=i_item; + (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr); + codelen = c_ptr - codebuff; + assert ( (codelen<128) && (codelen>0)); + if (is->method->debug >7) + logf(LOG_LOG,"isamd_appd: recoded into %d: %s (nk=%d) (ix=%d)", + codelen, hexdump(codebuff, codelen,hexbuff), + firstpp->numKeys,diffidx); } /* block full */ + /* Note: this goes horribly wrong if there is no room for the diff */ + /* after the merge! The solution is to increase the limit in */ + /* separateDiffBlock, to force a separate diff block earlier, and not */ + /* to have absurdly small blocks */ + assert ( diffidx+codelen <= maxsize ); + /* save the diff */ memcpy(&(pp->buf[diffidx]),codebuff,codelen); diffidx += codelen; @@ -779,7 +900,7 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) /* (try to) read the next input */ i_ptr = i_item; i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); - if ( (i_more) && (is->method->debug >3) ) + if ( (i_more) && (is->method->debug >6) ) logf(LOG_LOG,"isamd_appd: got m=%d %s", i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); } /* more loop */ @@ -789,6 +910,8 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidxbuf[difflenidx++]='\0'; + + (*firstpp->is->method->code_stop)(ISAMD_ENCODE, encoder_data); return save_both_pps( firstpp, pp ); } /* append_diffs */ @@ -814,7 +937,37 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.6 1999-07-23 15:43:05 heikki + * Revision 1.16 1999-08-24 10:12:02 heikki + * Comments about optimising + * + * Revision 1.15 1999/08/22 08:26:34 heikki + * COmments + * + * Revision 1.14 1999/08/20 12:25:58 heikki + * Statistics in isamd + * + * Revision 1.13 1999/08/18 13:59:19 heikki + * Fixed another unlikely difflen bug + * + * Revision 1.12 1999/08/18 13:28:17 heikki + * Set log levels to decent values + * + * Revision 1.11 1999/08/18 10:37:11 heikki + * Fixed (another) difflen bug + * + * Revision 1.10 1999/08/18 09:13:31 heikki + * Fixed a detail + * + * Revision 1.9 1999/08/17 19:46:53 heikki + * Fixed a memory leak + * + * Revision 1.8 1999/08/07 11:30:59 heikki + * Bug fixing (still a mem leak somewhere) + * + * Revision 1.7 1999/08/04 14:21:18 heikki + * isam-d seems to be working. + * + * Revision 1.6 1999/07/23 15:43:05 heikki * Hunted a few bugs in isam-d. Still crashes on the long test run * * Revision 1.5 1999/07/23 13:58:52 heikki