From c0a1c79e55f706009113b2de3c11a6f13d4a0e7d Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 25 Aug 1999 18:09:23 +0000 Subject: [PATCH] Starting to optimize --- isamc/isamd-p.h | 14 +- isamc/isamd.c | 9 +- isamc/merge-d.c | 729 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 739 insertions(+), 13 deletions(-) diff --git a/isamc/isamd-p.h b/isamc/isamd-p.h index aeb0a4c..c27a78e 100644 --- a/isamc/isamd-p.h +++ b/isamc/isamd-p.h @@ -1,4 +1,4 @@ -/* $Id: isamd-p.h,v 1.5 1999-08-20 12:25:58 heikki Exp $ +/* $Id: isamd-p.h,v 1.6 1999-08-25 18:09:23 heikki Exp $ * Copyright (c) 1995-1996, Index Data. * See the file LICENSE for details. * Heikki Levanto @@ -68,15 +68,16 @@ typedef struct ISAMD_DIFF_s *ISAMD_DIFF; struct ISAMD_PP_s { char *buf; /* buffer for read/write operations */ ISAMD_BLOCK_SIZE offset; /* position for next read/write */ - ISAMD_BLOCK_SIZE size; /* size of actual pointer data */ + ISAMD_BLOCK_SIZE size; /* size of actual data */ int cat; /* category of this block */ int pos; /* block number of this block */ int next; /* number of the next block */ int diffs; /* either block or offset (in head) of start of diffs */ + /* will not be used in the improved version! */ ISAMD is; - void *decodeClientData; + void *decodeClientData; /* delta-encoder's own data */ ISAMD_DIFF diffinfo; - char *diffbuf; /* buffer for the diff block, only when reading */ + char *diffbuf; /* buffer for the diff block */ int numKeys; }; @@ -104,7 +105,10 @@ void isamd_free_diffs(ISAMD_PP pp); /* * $Log: isamd-p.h,v $ - * Revision 1.5 1999-08-20 12:25:58 heikki + * Revision 1.6 1999-08-25 18:09:23 heikki + * Starting to optimize + * + * Revision 1.5 1999/08/20 12:25:58 heikki * Statistics in isamd * * Revision 1.4 1999/07/21 14:24:50 heikki diff --git a/isamc/isamd.c b/isamc/isamd.c index 6dcf031..af83ebd 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1995-1998, Index Data. * See the file LICENSE for details. - * $Id: isamd.c,v 1.10 1999-08-24 13:17:42 heikki Exp $ + * $Id: isamd.c,v 1.11 1999-08-25 18:09:24 heikki Exp $ * * Isamd - isam with diffs * Programmed by: Heikki Levanto @@ -27,7 +27,7 @@ static void init_fc (ISAMD is, int cat); #define ISAMD_FREELIST_CHUNK 1 -#define SMALL_TEST 0 +#define SMALL_TEST 1 ISAMD_M isamd_getmethod (ISAMD_M me) { @@ -717,7 +717,10 @@ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) /* * $Log: isamd.c,v $ - * Revision 1.10 1999-08-24 13:17:42 heikki + * Revision 1.11 1999-08-25 18:09:24 heikki + * Starting to optimize + * + * Revision 1.10 1999/08/24 13:17:42 heikki * Block sizes, comments * * Revision 1.9 1999/08/20 12:25:58 heikki diff --git a/isamc/merge-d.c b/isamc/merge-d.c index 980f6d7..196f0e3 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.17 1999-08-24 13:17:42 heikki Exp $ + * $Id: merge-d.c,v 1.18 1999-08-25 18:09:24 heikki Exp $ * * missing * @@ -24,6 +24,8 @@ * - On allocating more blocks (in append), check the order of blocks, and * if needed, swap them. * - In merge, merge also with the input data. + * - Write a routine to save/load indexes into a block, save only as many + * bytes as needed (size, diff, diffindexes) * * bugs * @@ -100,6 +102,9 @@ * may get an extra seek in read, but save merges. */ + +#define NEW_ISAM_D 0 /* not yet ready to delete the old one! */ + #include #include #include @@ -133,6 +138,8 @@ static char *hexdump(unsigned char *p, int len, char *buff) { return buff; } +#ifndef NEW_ISAM_D +/* The next many lines are the old ISAM_D. Works, but not optimal */ static int separateDiffBlock(ISAMD_PP pp) { @@ -155,7 +162,7 @@ static int separateDiffBlock(ISAMD_PP pp) static void getDiffInfo(ISAMD_PP pp, int diffidx) { /* builds the diff info structures from a diffblock */ - int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +1; + int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +2; /* Each diff takes at least 5 bytes. Probably more, but this is safe */ int i=1; /* [0] is used for the main data */ int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s); @@ -979,9 +986,721 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) } /* isamd_append */ -/* - * $Log: merge-d.c,v $ - * Revision 1.17 1999-08-24 13:17:42 heikki + +#else /* NEW_ISAM_D */ +/*************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + *************************************************************** + ***************************************************************/ + + +/*************************************************************** + * General support routines + ***************************************************************/ + +static void isamd_reduceblock(ISAMD_PP pp) +/* takes a large block, and reduces its category if possible */ +/* Presumably the first block in an isam-list */ +{ + if (pp->pos) + return; /* existing block, do not touch */ + /* TODO: Probably we may touch anyway? */ + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d", + pp->pos, pp->cat, pp->size); + while ( ( pp->cat > 0 ) && (!pp->next) && + (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) ) + pp->cat--; + pp->pos = isamd_alloc_block(pp->is, pp->cat); + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d", + pp->pos, pp->cat, pp->size); +} /* reduceblock */ + + +static int save_first_pp ( ISAMD_PP firstpp) +{ + isamd_buildfirstblock(firstpp); + isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf); + return isamd_addr(firstpp->pos,firstpp->cat); +} + + +static void save_last_pp (ISAMD_PP pp) +{ + pp->next = 0;/* just to be sure */ + isamd_buildlaterblock(pp); + isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); +} + +#ifdef UNUSED +static int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp) +{ + /* order of things: Better to save firstpp first, if there are just two */ + /* blocks, but last if there are blocks in between, as these have already */ + /* been saved... optimise later (that's why this is in its own func...*/ + int retval = save_first_pp(firstpp); + if (firstpp!=pp){ + save_last_pp(pp); + isamd_pp_close(pp); + } + isamd_pp_close(firstpp); + return retval; +} /* save_both_pps */ +#endif + + + +/*************************************************************** + * Diffblock handling + ***************************************************************/ + +void isamd_free_diffs(ISAMD_PP pp) +{ + int i; + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo); + if (!pp->diffinfo) + return; + for (i=1;pp->diffinfo[i].decodeData;i++) + { + if (pp->is->method->debug > 8) + logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i, + pp->diffinfo[i].decodeData); + (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData); + } + xfree(pp->diffinfo); + if (pp->diffbuf != pp->buf) + xfree (pp->diffbuf); + pp->diffbuf=0; + pp->diffinfo=0; +} /* isamd_free_diffs */ + + +static void getDiffInfo(ISAMD_PP pp, int diffidx) +{ /* builds the diff info structures from a diffblock */ + int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +1; + /* Each diff takes at least 5 bytes. Probably more, but this is safe */ + int i=2; /* [0] is used for the main data, [1] for merge inputs */ + int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s); + + pp->diffinfo = xmalloc( diffsz ); + memset(pp->diffinfo,'\0',diffsz); + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d", + isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos); + assert(pp->diffbuf); + + pp->diffinfo[0].maxidx=-1; /* mark as special */ + pp->diffinfo[1].maxidx=-1; /* mark as special */ + + while (i pp->is->method->filecat[pp->cat].bsize ) + { + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d", + diffidx, i); + return; /* whole block done */ + } + memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) ); + + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p", + pp->diffinfo[i].maxidx, diffidx, pp->diffbuf); + + if ( (pp->is->method->debug > 0) && + (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) + { /* bug-hunting, this fails on some long runs that log too much */ + logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", + __FILE__,__LINE__, diffidx); + logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, + pp->is->method->filecat[pp->cat].bsize); + logf(LOG_LOG,"pp=%d=%d:%d pp->nx=%d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->pos, pp->cat, + pp->next, isamd_type(pp->next), isamd_block(pp->next) ); + } + assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize+1); + + if (0==pp->diffinfo[i].maxidx) + { + if (pp->is->method->debug > 5) //!!! 4 + logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d", + diffidx, i); + return; /* end marker */ + } + diffidx += sizeof(int); + pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE); + pp->diffinfo[i].diffidx = diffidx; + if (pp->is->method->debug > 5) + logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s", + i,diffidx-sizeof(int),pp->diffinfo[i].maxidx, + hexdump((char *)&pp->diffbuf[diffidx-4],8,0) ); + diffidx=pp->diffinfo[i].maxidx; + if ( diffidx > pp->is->method->filecat[pp->cat].bsize ) + return; /* whole block done */ + ++i; + } + assert (!"too many diff sequences in the block"); +} + +/*************************************************************** + * Main block operations + ***************************************************************/ + + +static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp) +{ /* allocates a new block for the main data, and links it in */ + int newblock; + if (0 == firstpp->next) + { /* special case, pp not yet allocated. */ + /*Started as largest size, that's fine */ + pp->pos = isamd_alloc_block(pp->is,pp->cat); + firstpp->next = isamd_addr(pp->pos,pp->cat); + if (pp->is->method->debug >3) + logf(LOG_LOG,"isamd_build: Alloc 1. dblock p=%d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); + } + newblock=isamd_alloc_block(pp->is,pp->cat); + pp->next=isamd_addr(pp->cat,newblock); + isamd_buildlaterblock(pp); + isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf); + if (pp->is->method->debug >3) + logf(LOG_LOG,"isamd_build: Alloc nxt %d=%d:%d -> %d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, + isamd_addr(newblock,pp->cat), pp->cat, newblock); + pp->next=0; + pp->pos=newblock; + pp->size=pp->offset=ISAMD_BLOCK_OFFSET_N; + return pp; +} /* get_new_main_block */ + + +static ISAMD_PP append_main_item(ISAMD_PP firstpp, + ISAMD_PP pp, + struct it_key *i_key) +{ /* appends one item in the main data block, allocates new if needed */ + char *i_item= (char *) i_key; /* same as char */ + char *i_ptr=i_item; + char codebuff[128]; + char *c_ptr = codebuff; + int codelen; + char hexbuff[64]; + + int maxsize = pp->is->method->filecat[pp->is->max_cat].bsize; + + c_ptr=codebuff; + i_ptr=i_item; + (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData, + &c_ptr, &i_ptr); + codelen = c_ptr - codebuff; + assert ( (codelen<128) && (codelen>0)); + if (pp->is->method->debug >7) + logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)", + hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); + + if (pp->offset + codelen > maxsize ) + { /* oops, block full - get a new one */ + pp = get_new_main_block( firstpp, pp ); + /* reset encoging and code again */ + (*pp->is->method->code_reset)(pp->decodeClientData); + c_ptr=codebuff; + i_ptr=i_item; + (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData, + &c_ptr, &i_ptr); + codelen = c_ptr - codebuff; + assert ( (codelen<128) && (codelen>0)); + if (pp->is->method->debug >7) + logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)", + hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1); + } /* block full */ + + assert (pp->offset + codelen <= maxsize ); + + /* write the data into pp, now we must have room */ + memcpy(&(pp->buf[pp->offset]),codebuff,codelen); + pp->offset += codelen; + pp->size += codelen; + firstpp->numKeys++; + /* clear the next 4 bytes in block, to avoid confusions with diff lens */ + /* dirty, it should not be done here, but something slips somewhere, and */ + /* I hope this fixes it... - Heikki */ + codelen = pp->offset; + while ( (codelen < maxsize ) && (codelen <= pp->offset+4) ) + pp->buf[codelen++] = '\0'; + return pp; +} /* append_main_item */ + + +/*************************************************************** + * Merge + ***************************************************************/ + +static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ + struct it_key *p_key, /* the data item that didn't fit*/ + ISAMD_I data) /* more input data comes here */ +{ + int diffidx; + int killblk=0; + struct it_key r_key; + char * r_ptr; + int r_more = 1; + ISAMD_PP pp; + ISAMD_PP readpp=firstpp; + int retval=0; + int diffcat = firstpp->cat; /* keep the category of the diffblock even */ + /* if it is going to be empty now. */ + /* Alternative: Make it the minimal, and */ + /* resize later. Saves disk, but will lead */ + /* into bad seeks. */ + + ++(readpp->is->files[0].no_merges); + + /* set up diffs as they should be for reading */ + diffidx = ISAMD_BLOCK_OFFSET_1; + readpp->diffbuf=readpp->buf; + getDiffInfo(readpp,diffidx); + + if (readpp->is->method->debug >4) + logf(LOG_LOG,"isamd_merge: f=%d=%d:%d n=%d=%d:%d", + isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos, + firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next)); + + /* release our data block. Do before reading, when pos is stable ! */ + killblk=firstpp->pos; + if (killblk) + { + isamd_release_block(firstpp->is, firstpp->cat, killblk); + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)", + isamd_addr(killblk,firstpp->cat), firstpp->cat, killblk ); + } + + /* force the read to reload the first data block at first try */ + readpp->offset=readpp->size+1; + + + r_ptr= (char *) &r_key; + r_more = isamd_read_item( readpp, &r_ptr); + if (!r_more) + { /* oops, all data has been deleted! what to do??? */ + /* never mind, we have at least one more delta to add to the block */ + /* pray that is not a delete as well... */ + r_key.sysno = 0; + r_key.seqno = 0; + if (readpp->is->method->debug >5) + logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ", + readpp->numKeys); + //assert (readpp->numKeys == 0); /* no longer true! */ + } + + + /* set up the new blocks for simple writing */ + firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); + firstpp->pos=isamd_alloc_block(firstpp->is,diffcat); + + pp=isamd_pp_open(readpp->is,isamd_addr(0,readpp->is->max_cat) ); + + while (r_more) + { + if (readpp->is->method->debug >6) + logf(LOG_LOG,"isamd_merge: got key %d.%d", + r_key.sysno, r_key.seqno ); + pp= append_main_item(firstpp, pp, &r_key); + + if ( (readpp->pos != killblk ) && (0!=readpp->pos) ) + { /* pos can get to 0 at end of main seq, if still diffs left...*/ + if (readpp->is->method->debug >3) + logf(LOG_LOG,"isamd_merge: released block %d (%d:%d) now %d=%d:%d", + isamd_addr(killblk,readpp->cat), readpp->cat, killblk, + isamd_addr(readpp->pos,readpp->cat),readpp->cat, readpp->pos ); + isamd_release_block(readpp->is, readpp->cat, readpp->pos); + killblk=readpp->pos; + } + + /* (try to) read next item */ + r_ptr= (char *) &r_key; + r_more = isamd_read_item( readpp, &r_ptr); + + } /* while read */ + + + firstpp->diffs=0; + + + isamd_reduceblock(pp); /* reduce size if possible */ + save_last_pp(pp); + if (readpp->is->method->debug >4) + logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); + isamd_pp_close(pp); + + if (readpp->is->method->debug >5) + logf(LOG_LOG,"isamd_merge: closing readpp %d=%d:%d di=%p", + isamd_addr(readpp->pos,readpp->cat), readpp->cat, readpp->pos, + readpp->diffinfo); + isamd_pp_close(readpp); /* pos is 0 by now, at eof. close works anyway */ + + if (readpp->is->method->debug >2) + logf(LOG_LOG,"isamd_merge: merge ret f=%d=%d:%d pp=%d=%d:%d", + isamd_addr(firstpp->pos,pp->cat), firstpp->cat, firstpp->pos, + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos); + + retval = isamd_addr(firstpp->pos, firstpp->cat); + isamd_pp_close(firstpp); + + return retval; + +} /* merge */ + + + + +/*************************************************************** + * Read with merge + ***************************************************************/ + +/* Reads one item and corrects for the diffs, if any */ +/* return 1 for ok, 0 for eof */ +int isamd_read_item_merge ( + ISAMD_PP pp, + char **dst, + struct it_key *p_key, /* the data item that didn't fit*/ + ISAMD_I data) /* more input data comes here */ +{ /* The last two args can be null for ordinary reads */ + char *keyptr; + char *codeptr; + char *codestart; + int winner=0; /* which diff holds the day */ + int i; /* looping diffs */ + int cmp; + int retry=1; + if (pp->diffs==0) /* no diffs, just read the thing */ + return isamd_read_main_item(pp,dst); + + if (!pp->diffinfo) + getDiffInfo(pp, pp->offset); + + if (p_key) + pp->diffinfo[1].key = *p_key; /* the key merge could not handle */ + else + pp->diffinfo[1].key.sysno=0; + + if (data) + pp->diffinfo[1].maxidx=-1; /* signal we have diffs to read */ + else + pp->diffinfo[1].maxidx=0; + + pp->size=pp->offset=pp->is->method->filecat[pp->cat].bsize; + /* this forces a read of the next block at first read */ + + while (retry) + + { + retry=0; + if (0==pp->diffinfo[0].key.sysno) + { /* 0 is special case, main data. */ + keyptr=(char*) &(pp->diffinfo[0].key); + pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr); + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)", + pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno, + pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno); + } /* get main data */ + + if ( (0==pp->diffinfo[1].key.sysno) && (-1==pp->diffinfo[1].maxidx) ); + { /* 1 is another special case, the input data at merge */ + keyptr = (char *) &pp->diffinfo[1].key; + i = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[1].mode); + if (!i) + { /* did not get it */ + pp->diffinfo[1].key.sysno=0; + pp->diffinfo[1].maxidx=-2; /* stop trying */ + } + if (pp->is->method->debug >6) + logf(LOG_LOG,"merge: read diff m=%d %d.%d (%x.%x)", + pp->diffinfo[1].mode, + pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno, + pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno ); + } /* get input data */ + + winner = 0; + for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++) + { + if (pp->is->method->debug > 8) + logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d", + i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx); + + if ( (0==pp->diffinfo[i].key.sysno) && + (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx)) + {/* read a new one, if possible */ + codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]); + keyptr=(char *)&(pp->diffinfo[i].key); + (*pp->is->method->code_item)(ISAMD_DECODE, + pp->diffinfo[i].decodeData, &keyptr, &codeptr); + pp->diffinfo[i].diffidx += codeptr-codestart; + pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1; + pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ; + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); + } + if ( 0!= pp->diffinfo[i].key.sysno) + { /* got a key, compare */ + cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key); + if (0==pp->diffinfo[winner].key.sysno) + cmp=-1; /* end of main sequence, take all diffs */ + if (cmp<0) + { + if (pp->is->method->debug > 8) + logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)", + i, winner, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno, + pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno); + if (pp->diffinfo[i].mode) /* insert diff, should always be */ + winner = i; + else + assert(!"delete diff for nonexisting item"); + /* is an assert too steep here? Not really.*/ + } /* earlier key */ + else if (cmp==0) + { + if (!pp->diffinfo[i].mode) /* delete diff. should always be */ + { + if (pp->is->method->debug > 8) + logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)", + i, winner, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); + pp->diffinfo[winner].key.sysno=0; /* delete it */ + } + else + if (pp->is->method->debug > 2) + logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)", + i, winner, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno, + pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno); + /* skip the insert, since we already have it in the base */ + /* Should we fail an assertion here??? */ + pp->diffinfo[i].key.sysno=0; /* done with the delete */ + retry=1; /* start all over again */ + } /* matching key */ + /* else it is a later key, its turn will come */ + } /* got a key */ + } /* for each diff */ + } /* not retry */ + + if ( pp->diffinfo[winner].key.sysno) + { + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: got %d %d.%d (%x.%x)", + winner, + pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno, + pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno); + memcpy(*dst, &pp->diffinfo[winner].key, sizeof(struct it_key) ); + *dst += sizeof(struct it_key); + pp->diffinfo[winner].key.sysno=0; /* used that one up */ + cmp= 1; + } + else + { + if (pp->is->method->debug > 7) + logf(LOG_LOG,"isamd_read_item: eof w=%d %d.%d (%x.%x)", + winner, + pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno, + pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno); + assert(winner==0); /* if nothing found, nothing comes from a diff */ + cmp= 0; /* eof */ + } + return cmp; + +} /* isamd_read_item */ + + +int isamd_read_item (ISAMD_PP pp, char **dst) +{ + return isamd_read_item_merge(pp,dst,0,0); +} + + +/*************************************************************** + * Appending diffs + ***************************************************************/ + + + +static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) +{ + struct it_key i_key; /* one input item */ + char *i_item = (char *) &i_key; /* same as chars */ + char *i_ptr=i_item; + int i_more =1; + int i_mode; /* 0 for delete, 1 for insert */ + + ISAMD_PP firstpp; + char hexbuff[64]; + int diffidx=0; + int maxsize=0; + int difflenidx; + char codebuff[128]; + char *c_ptr = codebuff; + int codelen; + int merge_rc; + int retval=0; + + if (0==ipos) + { + firstpp=isamd_pp_open(is, isamd_addr(0,0) ); + firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; + /* create in smallest category, will expand later */ + ++(is->files[0].no_fbuilds); + } + else + { + firstpp=isamd_pp_open(is, ipos); + ++(is->files[0].no_appds); + } + + if (is->method->debug >2) + logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d", + ipos, isamd_type(ipos), isamd_block(ipos), + firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->diffs), + firstpp->numKeys); + maxsize = is->method->filecat[firstpp->cat].bsize; + + difflenidx = diffidx = firstpp->size; + + diffidx+=sizeof(int); /* difflen will be stored here */ + + /* read first input */ + i_ptr = i_item; + i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + + if (is->method->debug >6) + logf(LOG_LOG,"isamd_appd: start with m=%d %s", + i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); + + while (i_more) + { + /* store the mode bit inside key */ + assert( ((i_key.seqno<<1)>>1) == i_key.seqno); /* can spare the bit */ + i_key.seqno = i_key.seqno * 2 + i_mode; + + c_ptr=codebuff; + i_ptr=i_item; + (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData, + &c_ptr, &i_ptr); + codelen = c_ptr - codebuff; + assert ( (codelen<128) && (codelen>0)); + if (is->method->debug >7) + logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)", + codelen, hexdump(codebuff, codelen,hexbuff), + firstpp->numKeys,diffidx); + + if (diffidx + codelen > maxsize ) + { /* block full */ + if (firstpp->cat < firstpp->is->max_cat) + { /* just increase the block size */ + if (firstpp->pos > 0) /* free the old block if allocated */ + isamd_release_block(is, firstpp->cat, firstpp->pos); + ++firstpp->cat; + maxsize = is->method->filecat[firstpp->cat].bsize; + firstpp->pos=0; /* need to allocate it when saving */ + if (is->method->debug >3) + logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)", + firstpp->cat, maxsize); + } + else + { /* max size already - can't help, need to merge it */ + merge_rc = merge (firstpp, &i_key, data); + if (0!=merge_rc) + return merge_rc; /* merge handled them all ! */ + assert(!"merge returned zero ??"); + } /* need to merge */ + } /* block full */ + + assert ( diffidx+codelen <= maxsize ); + + /* save the diff */ + memcpy(&(firstpp->buf[diffidx]),codebuff,codelen); + diffidx += codelen; + firstpp->size += codelen; + firstpp->offset +=codelen; + + if (i_mode) + firstpp->numKeys++; /* insert diff */ + else + firstpp->numKeys--; /* delete diff */ + + /* update length of this diff run */ + memcpy(&(firstpp->buf[difflenidx]),&diffidx,sizeof(diffidx)); + + /* (try to) read the next input */ + i_ptr = i_item; + i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + if ( (i_more) && (is->method->debug >6) ) + logf(LOG_LOG,"isamd_appd: got m=%d %s", + i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) ); + } /* more loop */ + + /* clear the next difflen, if room for such */ + difflenidx = diffidx; + while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidxbuf[difflenidx++]='\0'; + + if (0==firstpp->pos) /* need to (re)alloc the block */ + firstpp->pos = isamd_alloc_block(is, firstpp->cat); + + retval = save_first_pp( firstpp ); + isamd_pp_close(firstpp); + + return retval; +} /* append_diffs */ + + + + +/************************************************************* + * isamd_append itself, Sweet, isn't it + *************************************************************/ + +ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) +{ + return append_diffs(is,ipos,data); +} /* isamd_append */ + + + + + +#endif /* NEW_ISAM_D */ + + + +/* + * $Log: merge-d.c,v $ + * Revision 1.18 1999-08-25 18:09:24 heikki + * Starting to optimize + * + * Revision 1.17 1999/08/24 13:17:42 heikki * Block sizes, comments * * Revision 1.16 1999/08/24 10:12:02 heikki -- 1.7.10.4