* See the file LICENSE for details.
* Heikki Levanto
*
- * $Id: merge-d.c,v 1.20 1999-09-20 15:48:06 heikki Exp $
+ * $Id: merge-d.c,v 1.25 1999-11-30 13:48:04 adam Exp $
*
* bugs
- * (none)
+ * sinleton-bit has to be in the high end, not low, so as not to confuse
+ * ordinary small numbers, like in the next pointer..
*
* missing
*
* optimize
- * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none)
- * - single-entry optimizing (keep the one entry in the dict, no block)
* - study and optimize block sizes (later)
* - find a way to decide the size of an empty diffblock (after merge)
* - On allocating more blocks (in append and merge), check the order of
#include <assert.h>
#include <string.h>
#include <stdio.h>
-#include <log.h>
+#include <yaz/log.h>
#include "../index/index.h"
#include "isamd-p.h"
* Input preprocess filter
***************************************************************/
+
+#define FILTER_NOTYET -1 /* no data read in yet, to be done */
+
struct ISAMD_FILTER_s {
ISAMD_I data; /* where the data comes from */
+ ISAMD is; /* for debug flags */
struct it_key k1; /* the next item to be returned */
+ int m1; /* mode for k1 */
+ int r1; /* result for read of k1, or NOTYET */
struct it_key k2; /* the one after that */
+ int m2;
+ int r2;
};
-static void init_filter( ISAMD_I data )
+typedef struct ISAMD_FILTER_s *FILTER;
+
+
+void filter_fill(FILTER F)
+{
+ while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) )
+ {
+ if (F->r1==FILTER_NOTYET)
+ { /* move data forward in the filter */
+ F->k1 = F->k2;
+ F->m1 = F->m2;
+ F->r1 = F->r2;
+ if ( 0 != F->r1 ) /* not eof */
+ F->r2 = FILTER_NOTYET; /* say we want more */
+ if (F->is->method->debug > 9)
+ logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d",
+ F->k1.sysno,
+ F->k1.seqno,
+ F->m1, F->r1);
+ }
+ if (F->r2==FILTER_NOTYET)
+ { /* read new bottom value */
+ char *k_ptr = (char*) &F->k2;
+ F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2);
+ if (F->is->method->debug > 9)
+ logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d",
+ F->k2.sysno, F->k2.seqno, F->m2, F->r2);
+ }
+ if ( (F->k1.sysno == F->k2.sysno) &&
+ (F->k1.seqno == F->k2.seqno) &&
+ (F->m1 != F->m2) &&
+ (F->r1 >0 ) && (F->r2 >0) )
+ { /* del-ins pair of same key (not eof) , ignore both */
+ if (F->is->method->debug > 9)
+ logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d",
+ F->k1.sysno, F->k1.seqno,
+ F->m1,F->m2, F->r1,F->r2);
+ F->r1 = FILTER_NOTYET;
+ F->r2 = FILTER_NOTYET;
+ }
+ } /* while */
+} /* filter_fill */
+
+
+FILTER filter_open( ISAMD is, ISAMD_I data )
{
+ FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s));
+ F->is = is;
+ F->data = data;
+ F->k1.sysno=0;
+ F->k1.seqno=0;
+ F->k2=F->k1;
+ F->m1 = F->m2 = 0;
+ F->r1 = F->r2 = FILTER_NOTYET;
+ filter_fill(F);
+ return F;
}
-static void close_filter ()
+static void filter_close (FILTER F)
{
+ xfree(F);
}
-static int filter_read( struct it_key *k)
+static int filter_read( FILTER F,
+ struct it_key *k,
+ int *mode)
{
- return 0;
+ int res;
+ filter_fill(F);
+ if (F->is->method->debug > 9)
+ logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d",
+ F->k1.sysno, F->k1.seqno, F->m1, F->r1);
+ res = F->r1;
+ if(res)
+ {
+ *k = F->k1;
+ *mode= F->m1;
+ }
+ F->r1 = FILTER_NOTYET;
+ return res;
}
-static int filter_empty()
+static int filter_isempty(FILTER F)
{
- return 0;
+ return ( (0 == F->r1) && (0 == F->r2)) ;
}
-static int filter_only_one()
+static int filter_only_one(FILTER F)
{
- return 0;
+ return ( (0 != F->r1) && (0 == F->r2));
}
+
+
+/***************************************************************
+ * Singleton encoding
+ ***************************************************************/
+/* When there is only a single item, we don't allocate a block
+ * for it, but code it in the directory entry directly, if it
+ * fits.
+ */
+
+#define DEC_SYSBITS 15
+#define DEC_SEQBITS 15
+#define DEC_MASK(n) ((1<<(n))-1)
+
+#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1))
+
+int is_singleton(ISAMD_P ipos)
+{
+ return ( ipos != 0 ) && ( ipos & SINGLETON_BIT );
+}
+
+
+int singleton_encode(struct it_key *k)
+/* encodes the key into one int. If it does not fit, returns 0 */
+{
+ if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno )
+ return 0; /* no room dor sysno */
+ if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno )
+ return 0; /* no room dor sysno */
+ return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT;
+}
+
+void singleton_decode (int code, struct it_key *k)
+{
+ assert (code & SINGLETON_BIT);
+ k->sysno = code & DEC_MASK(DEC_SYSBITS);
+ code = code >> DEC_SYSBITS;
+ k->seqno = code & DEC_MASK(DEC_SEQBITS);
+}
+
+
/***************************************************************
* General support routines
***************************************************************/
if ( (pp->is->method->debug > 0) &&
(pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) )
- { /* bug-hunting, this fails on some long runs that log too much */
+ {
logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d",
__FILE__,__LINE__, diffidx);
logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx,
ISAMD_PP pp,
char **dst,
struct it_key *p_key, /* the data item that didn't fit*/
- ISAMD_I data) /* more input data comes here */
+ /* ISAMD_I data) */ /* more input data comes here */
+ FILTER filt) /* more input data comes here */
{ /* The last two args can be null for ordinary reads */
char *keyptr;
char *codeptr;
p_key->sysno=p_key->seqno=0; /* used it up */
}
- if (data)
+ if (filt)
{ /* we have a whole input stream to inject */
pp->diffinfo[i].difftype=DT_INPU;
}
while (retry)
{
- retry=0;
-
-#ifdef SKIPTHIS
-
- if (0==pp->diffinfo[0].key.sysno)
- { /* 0 is special case, main data. */
- oldoffs=pp->offset;
- keyptr=(char*) &(pp->diffinfo[0].key);
- pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr);
- if (pp->is->method->debug > 7)
- logf(LOG_LOG,"isamd_read_item: read main at %d-%d %d.%d (%x.%x)",
- oldoffs,pp->offset,
- pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno,
- pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno);
- } /* get main data */
-
- if ( (0==pp->diffinfo[1].key.sysno) && (-1==pp->diffinfo[1].maxidx) )
- { /* 1 is another special case, the input data at merge */
- keyptr = (char *) &pp->diffinfo[1].key;
- i = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[1].mode);
- if (!i)
- { /* did not get it */
- pp->diffinfo[1].key.sysno=0;
- pp->diffinfo[1].maxidx=0; /* signal the end */
- }
- if (pp->is->method->debug >7)
- logf(LOG_LOG,"merge: read diff m=%d %d.%d (%x.%x)",
- pp->diffinfo[1].mode,
- pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno,
- pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno );
- } /* get input data */
-
-#endif // SKIPTHIS
-
-
+ retry=0;
winner = 0;
for (i=0; (!retry) && (pp->diffinfo[i].difftype); i++)
{
else if (pp->diffinfo[i].difftype==DT_INPU)
{
keyptr = (char *) &pp->diffinfo[i].key;
- rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode);
+ /* rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); */
+ rc = filter_read(filt, &pp->diffinfo[i].key,
+ &pp->diffinfo[i].mode);
if (!rc)
{ /* did not get it */
pp->diffinfo[i].key.sysno=0;
assert(winner==0); /* if nothing found, nothing comes from a diff */
cmp= 0; /* eof */
}
- if (pp->is->method->debug >9)
- logf(LOG_LOG,"mergeDB4: sysno[1]=%d", pp->diffinfo[1].key.sysno); /*!*/
+ if (cmp)
+ ++(pp->is->no_read_keys);
+ else
+ ++(pp->is->no_read_eof);
+
return cmp;
} /* isamd_read_item */
static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */
struct it_key *p_key, /* the data item that didn't fit*/
- ISAMD_I data) /* more input data comes here */
+ /* ISAMD_I data) */ /* more input data comes here */
+ FILTER filt) /* more input data arriving here */
{
int diffidx;
int killblk=0;
/* resize later. Saves disk, but will lead */
/* into bad seeks. */
- ++(readpp->is->files[0].no_merges);
+ ++(readpp->is->no_merges);
/* set up diffs as they should be for reading */
diffidx = ISAMD_BLOCK_OFFSET_1;
r_ptr= (char *) &r_key;
- r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data);
+/* r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data); */
+ r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt);
if (!r_more)
{ /* oops, all data has been deleted! what to do??? */
/* never mind, we have at least one more delta to add to the block */
if (readpp->is->method->debug >5)
logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ",
readpp->numKeys);
- //assert (readpp->numKeys == 0); /* no longer true! */
}
/* (try to) read next item */
r_ptr= (char *) &r_key;
- r_more = isamd_read_item_merge( readpp, &r_ptr,0,data);
+ r_more = isamd_read_item_merge( readpp, &r_ptr,0,filt);
} /* while read */
-static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
+static int append_diffs(
+ ISAMD is,
+ ISAMD_P ipos,
+ /*ISAMD_I data)*/
+ FILTER filt)
{
struct it_key i_key; /* one input item */
char *i_item = (char *) &i_key; /* same as chars */
firstpp=isamd_pp_open(is, isamd_addr(0,0) );
firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1;
/* create in smallest category, will expand later */
- ++(is->files[0].no_fbuilds);
+ ++(is->no_fbuilds);
}
else
{
firstpp=isamd_pp_open(is, ipos);
- ++(is->files[0].no_appds);
+ ++(is->no_appds);
}
if (is->method->debug >2)
diffidx+=sizeof(int); /* difflen will be stored here */
/* read first input */
- i_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
+ //i_ptr = i_item; //!!!
+ i_more = filter_read(filt, &i_key, &i_mode);
+ /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */
if (is->method->debug >6)
logf(LOG_LOG,"isamd_appd: start m=%d %d.%d=%x.%x: %d",
i_key.seqno = i_key.seqno * 2 + i_mode;
c_ptr=codebuff;
- i_ptr=i_item;
+ i_ptr=i_item;
(*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData,
&c_ptr, &i_ptr);
codelen = c_ptr - codebuff;
if (diffidx + codelen > maxsize )
{ /* block full */
- if (firstpp->cat < firstpp->is->max_cat)
- { /* just increase the block size */
+ while ( (firstpp->cat < firstpp->is->max_cat) &&
+ (diffidx + codelen > maxsize) )
+ { /* try to increase the block size */
if (firstpp->pos > 0) /* free the old block if allocated */
isamd_release_block(is, firstpp->cat, firstpp->pos);
++firstpp->cat;
maxsize = is->method->filecat[firstpp->cat].bsize;
firstpp->pos=0; /* need to allocate it when saving */
if (is->method->debug >3)
- logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)",
+ logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)",
firstpp->cat, maxsize);
}
- else
- { /* max size already - can't help, need to merge it */
+ if ((firstpp->cat >= firstpp->is->max_cat) &&
+ (diffidx + codelen > maxsize) )
+ { /* max size - can't help, need to merge it */
if (is->method->debug >7)
- logf(LOG_LOG,"isamd_appd: block full");
+ logf(LOG_LOG,"isamd_appd: need to merge");
if (is->method->debug >9) //!!!!!
logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d",
i_mode, i_key.sysno, i_key.seqno);
- merge_rc = merge (firstpp, &i_key, data);
+ merge_rc = merge (firstpp, &i_key, filt);
if (0!=merge_rc)
return merge_rc; /* merge handled them all ! */
assert(!"merge returned zero ??");
} /* need to merge */
} /* block full */
-
+
+ if (!( diffidx+codelen <= maxsize ))
+ { /* bug hunting */
+ logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d",
+ diffidx, codelen, diffidx+codelen, maxsize);
+ logf(LOG_LOG,"ipos=%d f=%d=%d:%d",
+ ipos,
+ isamd_addr(firstpp->pos, firstpp->cat),
+ firstpp->cat, firstpp->pos );
+ }
assert ( diffidx+codelen <= maxsize );
/* save the diff */
/* (try to) read the next input */
i_ptr = i_item;
- i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
+ i_more = filter_read(filt, &i_key, &i_mode);
+ /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */
if ( (i_more) && (is->method->debug >6) )
logf(LOG_LOG,"isamd_appd: got m=%d %d.%d=%x.%x: %d",
i_mode,
/*************************************************************
- * isamd_append itself, Sweet, isn't it
+ * isamd_append itself
*************************************************************/
ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
{
- return append_diffs(is,ipos,data);
+ FILTER F = filter_open(is,data);
+ ISAMD_P rc=0;
+
+ int olddebug= is->method->debug;
+ if (ipos == 7320)
+ is->method->debug = 99; /*!*/
+
+ if ( filter_isempty(F) ) /* can be, if del-ins of the same */
+ {
+ if (is->method->debug >3)
+ logf(LOG_LOG,"isamd_appd: nothing to do for %d=",ipos);
+ filter_close(F);
+ ++(is->no_non);
+ return ipos; /* without doing anything at all */
+ }
+
+ if ( ( 0==ipos) && filter_only_one(F) )
+ {
+ struct it_key k;
+ int mode;
+ filter_read(F,&k,&mode);
+ assert(mode);
+ rc = singleton_encode(&k);
+ if (is->method->debug >9)
+ logf(LOG_LOG,"isamd_appd: singleton %d (%x)",
+ rc,rc);
+ if (rc)
+ is->no_singles++;
+ assert ( (rc==0) || is_singleton(rc) );
+ }
+ if ( 0==rc) /* either not single, or it did not fit */
+ {
+ rc = append_diffs(is,ipos,F);
+ assert ( ! is_singleton(rc) );
+ /* can happen if we run out of bits, so that block numbers overflow */
+ /* to SINGLETON_BIT */
+ }
+ filter_close(F);
+
+ if (is->method->debug >2)
+ logf(LOG_LOG,"isamd_appd: ret %d=%x (%d=%x)",
+ rc,rc,ipos,ipos);
+ is->method->debug=olddebug; /*!*/
+ return rc;
} /* isamd_append */
/*
* $Log: merge-d.c,v $
- * Revision 1.20 1999-09-20 15:48:06 heikki
+ * Revision 1.25 1999-11-30 13:48:04 adam
+ * Improved installation. Updated for inclusion of YAZ header files.
+ *
+ * Revision 1.24 1999/10/05 09:57:40 heikki
+ * Tuning the isam-d (and fixed a small "detail")
+ *
+ * Revision 1.23 1999/09/27 14:36:36 heikki
+ * singletons
+ *
+ * Revision 1.22 1999/09/23 18:01:18 heikki
+ * singleton optimising
+ *
+ * Revision 1.21 1999/09/21 17:36:43 heikki
+ * Added filter function. Not much of effect on the small test set...
+ *
+ * Revision 1.20 1999/09/20 15:48:06 heikki
* Small changes
*
* Revision 1.19 1999/09/13 13:28:28 heikki
*/
-
-