X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamc%2Fmerge-d.c;h=d73a459c4242688d63b8350530a5e7561cb6b383;hb=e5e4f1de02396b2cfab488f8e5918853e96d6b53;hp=e31adaa5436a8eeb271981e66cb373acbe5c2409;hpb=b918342ded2c57bcd57bf7af6748c17bce4d1cf6;p=idzebra-moved-to-github.git diff --git a/isamc/merge-d.c b/isamc/merge-d.c index e31adaa..d73a459 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -1,108 +1,26 @@ -/* - * Copyright (c) 1996-1998, Index Data. - * See the file LICENSE for details. - * Heikki Levanto - * - * $Id: merge-d.c,v 1.19 1999-09-13 13:28:28 heikki Exp $ - * - * missing - * - * optimize - * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) - * - single-entry optimizing (keep the one entry in the dict, no block) - * - study and optimize block sizes (later) - * - Clean up the different ways diffs are handled in writing and reading - * - Keep a merge-count in the firstpp, and if the block has already been - * merged, reduce it to a larger size even if it could fit in a small one! - * - Keep minimum freespace in the category table, and use that in reduce! - * - pass a space-needed for separateDiffBlock and reduce to be able to - * reserve more room for diffs, or to force a separate (larger?) block - * - Idea: Simplify the structure, so that the first block is always diffs. - * On small blocks, that is all we have. Once a block has been merged, we - * allocate the first main block and a (new) firstblock ffor diffs. From - * that point on the word has two blocks for it. - * - On allocating more blocks (in append), check the order of blocks, and - * if needed, swap them. - * - In merge, merge also with the input data. - * - Write a routine to save/load indexes into a block, save only as many - * bytes as needed (size, diff, diffindexes) - * - * bugs - * - Some confusion about opening pp's, how to set offset etc. Maybe it'd be - * best to load both diffs and first main block? - * - * caveat - * There is a confusion about the block addresses. cat or type is the category, - * pos or block is the block number. pp structures keep these two separate, - * and combine when saving the pp. The next pointer in the pp structure is - * also a combined address, but needs to be combined every time it is needed, - * and separated when the partss are needed... This is done with the isamd_ - * _block, _type, and _addr macros. The _addr takes block and type as args, - * in that order. This conflicts with the order these are often mentioned in - * the debug log calls, and other places, leading to small mistakes here - * and there. - * - * Needs cleaning! The way diff blocks are handled in append and reading is - * quite different, and likely to give maintenance problems. - * - * log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) ) - * 0 = no logging. Default - * 1 = no logging here. isamd logs overall statistics - * 2 = Each call to isamd_append with start address and no more - * 3 = Start and type of append, start of merge, and result of append - * 4 = Block allocations - * 5 = Block-level operations (read/write) - * 6 = Details about diff blocks etc. - * 7 = Log each record as it passes the system (once) - * 8 = Log raw and (de)coded data - * 9 = Anything else that may be useful - * .. = Anything needed to hunt a specific bug - * (note that all tests in the code are like debug>3, which means 4 or above!) - * - * Design for the new and improved isamd - * Key points: - * - The first block is only diffs, no straight data - * - Additional blocks are straight data - * - When a diff block gets filled up, a data block is created by - * merging the diffs with the data - * - * Structure - * - Isamd_pp: buffer for diffs and for data - * keep both pos, type, and combined address - * routine to set the address - * - diffbuf: lengths as short ints, or bytes for small blocks - * - keys are of key_struct, not just a number of bytes. - * - * Routines - * - isamd_append - * - create_new_block if needed - * - append_diffs - * - load_diffs - * - get diffend, start encoding - * - while input data - * - encode it - * - if no room, then realloc block in larger size - * - if still no room, merge and exit - * - append in the block - * - * - merge - * - just as before, except that merges also input data directly - * - writes into new data blocks - * - * - * - isamd.c: load firstpp, load datablock - * save firstpp, save datablock - * - Readlength, writelength - handling right size of len fields - * - isamd_read_main_item: take also a merge input structure, and merge it too - * - prefilter: cache two inputs, and check if they cancel. - * - single-item optimization - * - * questions: Should we realloc firstblocks in a different size as the main - * blocks. Makes a sideways seek, which is bound to be slowe. But saves some - * update time. Compromise: alloc the first one in the size of the datablock, - * but increase if necessary. Large blocks get a large diff, ok. Small ones - * may get an extra seek in read, but save merges. - */ +/* $Id: merge-d.c,v 1.28 2002-08-02 19:26:56 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + #define NEW_ISAM_D 1 /* not yet ready to delete the old one! */ @@ -111,7 +29,7 @@ #include #include #include -#include +#include #include "../index/index.h" #include "isamd-p.h" @@ -131,6 +49,179 @@ struct ISAMD_DIFF_s { #define DT_INPU 3 // input data to be merged #define DT_DONE 4 // done with all input here + + +/*************************************************************** + * Input preprocess filter + ***************************************************************/ + + +#define FILTER_NOTYET -1 /* no data read in yet, to be done */ + +struct ISAMD_FILTER_s { + ISAMD_I data; /* where the data comes from */ + ISAMD is; /* for debug flags */ + struct it_key k1; /* the next item to be returned */ + int m1; /* mode for k1 */ + int r1; /* result for read of k1, or NOTYET */ + struct it_key k2; /* the one after that */ + int m2; + int r2; +}; + +typedef struct ISAMD_FILTER_s *FILTER; + + +void filter_fill(FILTER F) +{ + while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) ) + { + if (F->r1==FILTER_NOTYET) + { /* move data forward in the filter */ + F->k1 = F->k2; + F->m1 = F->m2; + F->r1 = F->r2; + if ( 0 != F->r1 ) /* not eof */ + F->r2 = FILTER_NOTYET; /* say we want more */ + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d", + F->k1.sysno, + F->k1.seqno, + F->m1, F->r1); + } + if (F->r2==FILTER_NOTYET) + { /* read new bottom value */ + char *k_ptr = (char*) &F->k2; + F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2); + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d", + F->k2.sysno, F->k2.seqno, F->m2, F->r2); + } + if ( (F->k1.sysno == F->k2.sysno) && + (F->k1.seqno == F->k2.seqno) && + (F->m1 != F->m2) && + (F->r1 >0 ) && (F->r2 >0) ) + { /* del-ins pair of same key (not eof) , ignore both */ + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d", + F->k1.sysno, F->k1.seqno, + F->m1,F->m2, F->r1,F->r2); + F->r1 = FILTER_NOTYET; + F->r2 = FILTER_NOTYET; + } + } /* while */ +} /* filter_fill */ + + +FILTER filter_open( ISAMD is, ISAMD_I data ) +{ + FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s)); + F->is = is; + F->data = data; + F->k1.sysno=0; + F->k1.seqno=0; + F->k2=F->k1; + F->m1 = F->m2 = 0; + F->r1 = F->r2 = FILTER_NOTYET; + filter_fill(F); + return F; +} + +static void filter_close (FILTER F) +{ + xfree(F); +} + +static int filter_read( FILTER F, + struct it_key *k, + int *mode) +{ + int res; + filter_fill(F); + if (F->is->method->debug > 9) + logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d", + F->k1.sysno, F->k1.seqno, F->m1, F->r1); + res = F->r1; + if(res) + { + *k = F->k1; + *mode= F->m1; + } + F->r1 = FILTER_NOTYET; + return res; +} + +static int filter_isempty(FILTER F) +{ + return ( (0 == F->r1) && (0 == F->r2)) ; +} + +static int filter_only_one(FILTER F) +{ + return ( (0 != F->r1) && (0 == F->r2)); +} + +/* We may need backfilling, if we read a lonely key to make */ +/* a singleton, but its bitw will not fit in. Then we need to */ +/* process it normally, which means reading it again. So we */ +/* need to unread it first. Luckily the filter is empty at that */ +/* point */ +static void filter_backfill(FILTER F, struct it_key *k, int mode) +{ + assert(F->r1 == FILTER_NOTYET ); /* not overwriting data! */ + F->k1=*k; + F->m1=mode; + F->r1=1; /* ok read */ +} + + +/*************************************************************** + * Singleton encoding + ***************************************************************/ +/* When there is only a single item, we don't allocate a block + * for it, but code it in the directory entry directly, if it + * fits. + */ + +#define DEC_SYSBITS 15 +#define DEC_SEQBITS 15 +#define DEC_MASK(n) ((1<<(n))-1) + +#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1)) + +int is_singleton(ISAMD_P ipos) +{ + return 0; /* no singletons any more */ + return ( ipos != 0 ) && ( ipos & SINGLETON_BIT ); +} + + +int singleton_encode(struct it_key *k) +/* encodes the key into one int. If it does not fit, returns 0 */ +{ + return 0; /* no more singletons */ + if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno ) + return 0; /* no room dor sysno */ + if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno ) + return 0; /* no room dor sysno */ + return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT; +} + +void singleton_decode (int code, struct it_key *k) +{ + assert (code & SINGLETON_BIT); + k->sysno = code & DEC_MASK(DEC_SYSBITS); + code = code >> DEC_SYSBITS; + k->seqno = code & DEC_MASK(DEC_SEQBITS); +} + + +/*************************************************************** + * General support routines + ***************************************************************/ + + + static char *hexdump(unsigned char *p, int len, char *buff) { static char localbuff[128]; char bytebuff[8]; @@ -145,29 +236,7 @@ static char *hexdump(unsigned char *p, int len, char *buff) { return buff; } -/*************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - *************************************************************** - ***************************************************************/ - -/*************************************************************** - * General support routines - ***************************************************************/ static void isamd_reduceblock(ISAMD_PP pp) /* takes a large block, and reduces its category if possible */ @@ -292,7 +361,7 @@ static void getDiffInfo(ISAMD_PP pp ) if ( (pp->is->method->debug > 0) && (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) ) - { /* bug-hunting, this fails on some long runs that log too much */ + { logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", __FILE__,__LINE__, diffidx); logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx, @@ -424,7 +493,8 @@ int isamd_read_item_merge ( ISAMD_PP pp, char **dst, struct it_key *p_key, /* the data item that didn't fit*/ - ISAMD_I data) /* more input data comes here */ + /* ISAMD_I data) */ /* more input data comes here */ + FILTER filt) /* more input data comes here */ { /* The last two args can be null for ordinary reads */ char *keyptr; char *codeptr; @@ -460,7 +530,7 @@ int isamd_read_item_merge ( p_key->sysno=p_key->seqno=0; /* used it up */ } - if (data) + if (filt) { /* we have a whole input stream to inject */ pp->diffinfo[i].difftype=DT_INPU; } @@ -469,41 +539,7 @@ int isamd_read_item_merge ( while (retry) { - retry=0; - -#ifdef SKIPTHIS - - if (0==pp->diffinfo[0].key.sysno) - { /* 0 is special case, main data. */ - oldoffs=pp->offset; - keyptr=(char*) &(pp->diffinfo[0].key); - pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr); - if (pp->is->method->debug > 7) - logf(LOG_LOG,"isamd_read_item: read main at %d-%d %d.%d (%x.%x)", - oldoffs,pp->offset, - pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno, - pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno); - } /* get main data */ - - if ( (0==pp->diffinfo[1].key.sysno) && (-1==pp->diffinfo[1].maxidx) ) - { /* 1 is another special case, the input data at merge */ - keyptr = (char *) &pp->diffinfo[1].key; - i = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[1].mode); - if (!i) - { /* did not get it */ - pp->diffinfo[1].key.sysno=0; - pp->diffinfo[1].maxidx=0; /* signal the end */ - } - if (pp->is->method->debug >7) - logf(LOG_LOG,"merge: read diff m=%d %d.%d (%x.%x)", - pp->diffinfo[1].mode, - pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno, - pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno ); - } /* get input data */ - -#endif // SKIPTHIS - - + retry=0; winner = 0; for (i=0; (!retry) && (pp->diffinfo[i].difftype); i++) { @@ -560,7 +596,9 @@ int isamd_read_item_merge ( else if (pp->diffinfo[i].difftype==DT_INPU) { keyptr = (char *) &pp->diffinfo[i].key; - rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); + /* rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); */ + rc = filter_read(filt, &pp->diffinfo[i].key, + &pp->diffinfo[i].mode); if (!rc) { /* did not get it */ pp->diffinfo[i].key.sysno=0; @@ -655,8 +693,11 @@ int isamd_read_item_merge ( assert(winner==0); /* if nothing found, nothing comes from a diff */ cmp= 0; /* eof */ } - if (pp->is->method->debug >9) - logf(LOG_LOG,"mergeDB4: sysno[1]=%d", pp->diffinfo[1].key.sysno); /*!*/ + if (cmp) + ++(pp->is->no_read_keys); + else + ++(pp->is->no_read_eof); + return cmp; } /* isamd_read_item */ @@ -674,7 +715,9 @@ int isamd_read_item (ISAMD_PP pp, char **dst) static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ struct it_key *p_key, /* the data item that didn't fit*/ - ISAMD_I data) /* more input data comes here */ + FILTER filt, /* more input data arriving here */ + char *dictentry, /* the thin in the dictionary */ + int dictlen) /* and its size */ { int diffidx; int killblk=0; @@ -683,14 +726,14 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ int r_more = 1; ISAMD_PP pp; ISAMD_PP readpp=firstpp; - int retval=0; + int retpos=0; int diffcat = firstpp->cat; /* keep the category of the diffblock even */ /* if it is going to be empty now. */ /* Alternative: Make it the minimal, and */ /* resize later. Saves disk, but will lead */ /* into bad seeks. */ - ++(readpp->is->files[0].no_merges); + ++(readpp->is->no_merges); /* set up diffs as they should be for reading */ diffidx = ISAMD_BLOCK_OFFSET_1; @@ -714,7 +757,7 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ r_ptr= (char *) &r_key; - r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data); + r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt); if (!r_more) { /* oops, all data has been deleted! what to do??? */ /* never mind, we have at least one more delta to add to the block */ @@ -724,18 +767,18 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ if (readpp->is->method->debug >5) logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ", readpp->numKeys); - //assert (readpp->numKeys == 0); /* no longer true! */ } /* set up the new blocks for simple writing */ - firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); + /* firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); */ + firstpp=isamd_pp_create(readpp->is, diffcat); firstpp->pos=isamd_alloc_block(firstpp->is,diffcat); if (readpp->is->method->debug >3) logf(LOG_LOG,"isamd_merge: allocated new firstpp %d=%d:%d", isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos ); - pp=isamd_pp_open(readpp->is,isamd_addr(0,readpp->is->max_cat) ); + pp=isamd_pp_create(readpp->is,readpp->is->max_cat ); pp->offset=pp->size=ISAMD_BLOCK_OFFSET_N; while (r_more) @@ -757,12 +800,12 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ /* (try to) read next item */ r_ptr= (char *) &r_key; - r_more = isamd_read_item_merge( readpp, &r_ptr,0,data); + r_more = isamd_read_item_merge( readpp, &r_ptr,0,filt); } /* while read */ - firstpp->diffs=0; +// firstpp->diffs=0; isamd_reduceblock(pp); /* reduce size if possible */ @@ -788,10 +831,17 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; /* nothing there */ memset(firstpp->buf,'\0',firstpp->is->method->filecat[firstpp->cat].bsize); save_first_pp(firstpp); - retval = isamd_addr(firstpp->pos, firstpp->cat); + retpos = isamd_addr(firstpp->pos, firstpp->cat); isamd_pp_close(firstpp); - return retval; + /* Create the dict entry */ + /*!*/ /* it could be this could go in the dict as well, if there's */ + /* been really many deletes. Somehow I suspect that is not the */ + /* case. FIXME: Collect statistics and see if needed */ + dictentry[0]=0; /* mark as a real isam */ + memcpy(dictentry+1, &retpos, sizeof(ISAMD_P)); + dictlen=sizeof(ISAMD_P)+1; + return dictlen; } /* merge */ @@ -804,8 +854,12 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ -static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) +static int append_diffs( + ISAMD is, + char *dictentry, int dictlen, + FILTER filt) { + ISAMD_P ipos; struct it_key i_key; /* one input item */ char *i_item = (char *) &i_key; /* same as chars */ char *i_ptr=i_item; @@ -821,26 +875,31 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) char *c_ptr = codebuff; int codelen; int merge_rc; - int retval=0; + ISAMD_P retpos; + int dsize; - if (0==ipos) + if (0==dictlen) { - firstpp=isamd_pp_open(is, isamd_addr(0,0) ); + firstpp=isamd_pp_create(is, 0 ); firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; /* create in smallest category, will expand later */ - ++(is->files[0].no_fbuilds); + ++(is->no_fbuilds); } else { - firstpp=isamd_pp_open(is, ipos); - ++(is->files[0].no_appds); + firstpp=isamd_pp_open(is, dictentry, dictlen); + if (dictentry[0] ) + ipos=0; + else + memcpy(&ipos,dictentry+1,sizeof(ISAMD_P)); + ++(is->no_appds); } if (is->method->debug >2) - logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d", + logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d sz=%d", ipos, isamd_type(ipos), isamd_block(ipos), firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next), - firstpp->numKeys); + firstpp->numKeys, firstpp->size); maxsize = is->method->filecat[firstpp->cat].bsize; difflenidx = diffidx = firstpp->size; @@ -848,8 +907,9 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) diffidx+=sizeof(int); /* difflen will be stored here */ /* read first input */ - i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + //i_ptr = i_item; //!!! + i_more = filter_read(filt, &i_key, &i_mode); + /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */ if (is->method->debug >6) logf(LOG_LOG,"isamd_appd: start m=%d %d.%d=%x.%x: %d", @@ -865,7 +925,7 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) i_key.seqno = i_key.seqno * 2 + i_mode; c_ptr=codebuff; - i_ptr=i_item; + i_ptr=i_item; (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData, &c_ptr, &i_ptr); codelen = c_ptr - codebuff; @@ -877,31 +937,42 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) if (diffidx + codelen > maxsize ) { /* block full */ - if (firstpp->cat < firstpp->is->max_cat) - { /* just increase the block size */ + while ( (firstpp->cat < firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* try to increase the block size */ if (firstpp->pos > 0) /* free the old block if allocated */ isamd_release_block(is, firstpp->cat, firstpp->pos); ++firstpp->cat; maxsize = is->method->filecat[firstpp->cat].bsize; firstpp->pos=0; /* need to allocate it when saving */ if (is->method->debug >3) - logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)", + logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)", firstpp->cat, maxsize); } - else - { /* max size already - can't help, need to merge it */ + if ((firstpp->cat >= firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* max size - can't help, need to merge it */ if (is->method->debug >7) - logf(LOG_LOG,"isamd_appd: block full"); + logf(LOG_LOG,"isamd_appd: need to merge"); if (is->method->debug >9) //!!!!! logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d", i_mode, i_key.sysno, i_key.seqno); - merge_rc = merge (firstpp, &i_key, data); + merge_rc = merge (firstpp, &i_key, filt, dictentry, dictlen); if (0!=merge_rc) return merge_rc; /* merge handled them all ! */ assert(!"merge returned zero ??"); } /* need to merge */ } /* block full */ - + + if (!( diffidx+codelen <= maxsize )) + { /* bug hunting */ + logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d", + diffidx, codelen, diffidx+codelen, maxsize); + logf(LOG_LOG,"ipos=%d f=%d=%d:%d", + ipos, + isamd_addr(firstpp->pos, firstpp->cat), + firstpp->cat, firstpp->pos ); + } assert ( diffidx+codelen <= maxsize ); /* save the diff */ @@ -919,7 +990,8 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) /* (try to) read the next input */ i_ptr = i_item; - i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); + i_more = filter_read(filt, &i_key, &i_mode); + /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */ if ( (i_more) && (is->method->debug >6) ) logf(LOG_LOG,"isamd_appd: got m=%d %d.%d=%x.%x: %d", i_mode, @@ -933,25 +1005,94 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) while ( (difflenidx-diffidx<=sizeof(int)+1) && (difflenidxbuf[difflenidx++]='\0'; - if (0==firstpp->pos) /* need to (re)alloc the block */ - firstpp->pos = isamd_alloc_block(is, firstpp->cat); + if (firstpp->numKeys==0) + { + /* FIXME: Release blocks that may be allocated !!! */ + return 0; /* don't bother storing this! */ + } - retval = save_first_pp( firstpp ); - isamd_pp_close(firstpp); + dsize=diffidx-ISAMD_BLOCK_OFFSET_1; + /* logf(LOG_LOG,"!! nxt=%d diffidx=%d ds=%d", + firstpp->next, diffidx, dsize); */ + + if ( (0==firstpp->next) && (dsize numKeys < 128); + assert(firstpp->numKeys >0); + /* actually, 255 is good enough, but sign mismatches... */ + /* in real life, 4-5 is as much as we can hope for, as long */ + /* as ISAMD_MAX_DICT_LEN is reasonably small (8) */ + dictentry[0]=firstpp->numKeys; + memcpy(dictentry+1, firstpp->buf+ISAMD_BLOCK_OFFSET_1, dsize); + dictlen=dsize+1; + } + else + { + if (0==firstpp->pos) /* need to (re)alloc the block */ + firstpp->pos = isamd_alloc_block(is, firstpp->cat); + retpos = save_first_pp( firstpp ); + isamd_pp_close(firstpp); + dictentry[0]=0; /* mark as a real isam */ + memcpy(dictentry+1, &retpos, sizeof(ISAMD_P)); + dictlen=sizeof(ISAMD_P)+1; + } - return retval; + return dictlen; } /* append_diffs */ /************************************************************* - * isamd_append itself, Sweet, isn't it + * isamd_append itself *************************************************************/ -ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) +int isamd_append (ISAMD is, char *dictentry, int dictlen, ISAMD_I data) +/*ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */ { - return append_diffs(is,ipos,data); + FILTER F = filter_open(is,data); + int newlen=0; + + if ( filter_isempty(F) ) /* can be, if del-ins of the same */ + { + if (is->method->debug >3) + logf(LOG_LOG,"isamd_appd: nothing to do "); + filter_close(F); + ++(is->no_non); + return dictlen; /* without doing anything at all */ + } + +#ifdef SKIPTHIS + /* The old way to handle singletons */ + if ( ( 0==ipos) && filter_only_one(F) ) + { + struct it_key k; + int mode; + filter_read(F,&k,&mode); + assert(mode); + rc = singleton_encode(&k); + if (!rc) + { + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton didn't fit, backfilling"); + filter_backfill(F,&k, mode); + } + if (is->method->debug >9) + logf(LOG_LOG,"isamd_appd: singleton %d (%x)", + rc,rc); + if (rc) + is->no_singles++; + assert ( (rc==0) || is_singleton(rc) ); + } + newlen = append_diffs(is,ipos,F); +#endif + newlen = append_diffs(is,dictentry,dictlen,F); + filter_close(F); + + if (is->method->debug >2) + logf(LOG_LOG,"isamd_appd: ret len=%d ", newlen); + return newlen; } /* isamd_append */ @@ -962,7 +1103,36 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.19 1999-09-13 13:28:28 heikki + * Revision 1.28 2002-08-02 19:26:56 adam + * Towards GPL + * + * Revision 1.27 2002/07/12 18:12:21 heikki + * Isam-D now stores small entries directly in the dictionary. + * Needs more tuning and cleaning... + * + * Revision 1.26 2002/07/11 16:16:00 heikki + * Fixed a bug in isamd, failed to store a single key when its bits + * did not fit into a singleton. + * + * Revision 1.25 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.24 1999/10/05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.23 1999/09/27 14:36:36 heikki + * singletons + * + * Revision 1.22 1999/09/23 18:01:18 heikki + * singleton optimising + * + * Revision 1.21 1999/09/21 17:36:43 heikki + * Added filter function. Not much of effect on the small test set... + * + * Revision 1.20 1999/09/20 15:48:06 heikki + * Small changes + * + * Revision 1.19 1999/09/13 13:28:28 heikki * isam-d optimizing: merging input data in the same go * * Revision 1.18 1999/08/25 18:09:24 heikki @@ -1018,5 +1188,3 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */ - -