Version info to stdout.

[idzebra-moved-to-github.git] / isamc / merge-d.c
diff --git a/isamc/merge-d.c b/isamc/merge-d.c

index 317818f..d73a459 100644 (file)
--- a/isamc/merge-d.c
+++ b/isamc/merge-d.c
@@ -1,98 +1,26 @@
-/*
- * Copyright (c) 1996-1998, Index Data.
- * See the file LICENSE for details.
- * Heikki Levanto
- *
- * $Id: merge-d.c,v 1.20 1999-09-20 15:48:06 heikki Exp $
- *
- * bugs
- *   (none)
- *
- * missing
- *
- * optimize
- *  - Input filter: Eliminate del-ins pairs, tell if only one entry (or none)
- *  - single-entry optimizing (keep the one entry in the dict, no block)
- *  - study and optimize block sizes (later)
- *  - find a way to decide the size of an empty diffblock (after merge)
- *  - On allocating more blocks (in append and merge), check the order of 
- *    blocks, and if needed, swap them. 
- *  - Write a routine to save/load indexes into a block, save only as many 
- *    bytes as needed (size, diff, diffindexes)
- *
- *
- * caveat
- *  There is a confusion about the block addresses. cat or type is the category,
- *  pos or block is the block number. pp structures keep these two separate,
- *  and combine when saving the pp. The next pointer in the pp structure is
- *  also a combined address, but needs to be combined every time it is needed,
- *  and separated when the partss are needed... This is done with the isamd_
- *  _block, _type, and _addr macros. The _addr takes block and type as args,
- *  in that order. This conflicts with the order these are often mentioned in 
- *  the debug log calls, and other places, leading to small mistakes here
- *  and there. 
- *
- *  Needs cleaning! The way diff blocks are handled in append and reading is
- *  quite different, and likely to give maintenance problems.
- *
- *  log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) )
- *    0 = no logging. Default
- *    1 = no logging here. isamd logs overall statistics
- *    2 = Each call to isamd_append with start address and no more
- *    3 = Start and type of append, start of merge, and result of append
- *    4 = Block allocations
- *    5 = Block-level operations (read/write)
- *    6 = Details about diff blocks etc.
- *    7 = Log each record as it passes the system (once)
- *    8 = Log raw and (de)coded data
- *    9 = Anything else that may be useful
- *   .. = Anything needed to hunt a specific bug
- *  (note that all tests in the code are like debug>3, which means 4 or above!)
- *
- * Design for the new and improved isamd
- * Key points:
- *  - The first block is only diffs, no straight data
- *  - Additional blocks are straight data
- *  - When a diff block gets filled up, a data block is created by
- *    merging the diffs with the data
- *
- * Structure
- *  - Isamd_pp: buffer for diffs and for data
- *              keep both pos, type, and combined address
- *              routine to set the address
- *  - diffbuf: lengths as short ints, or bytes for small blocks
- *  - keys are of key_struct, not just a number of bytes.
- *
- * Routines
- *  - isamd_append
- *    - create_new_block if needed
- *    - append_diffs
- *      - load_diffs 
- *      - get diffend, start encoding
- *      - while input data
- *        - encode it
- *        - if no room, then realloc block in larger size
- *        - if still no room, merge and exit
- *        - append in the block
- *
- * - merge
- *   - just as before, except that merges also input data directly
- *   - writes into new data blocks
- *       
- *      
- * - isamd.c: load firstpp, load datablock
- *            save firstpp, save datablock
- * - Readlength, writelength - handling right size of len fields
- * - isamd_read_main_item: take also a merge input structure, and merge it too
- * - prefilter: cache two inputs, and check if they cancel.
- * - single-item optimization
- * 
- * questions: Should we realloc firstblocks in a different size as the main
- * blocks. Makes a sideways seek, which is bound to be slowe. But saves some
- * update time. Compromise: alloc the first one in the size of the datablock,
- * but increase if necessary. Large blocks get a large diff, ok. Small ones
- * may get an extra seek in read, but save merges.
- */
+/* $Id: merge-d.c,v 1.28 2002-08-02 19:26:56 adam Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+   Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
  
  
  #define NEW_ISAM_D 1  /* not yet ready to delete the old one! */
@@ -101,7 +29,7 @@
  #include <assert.h>
  #include <string.h>
  #include <stdio.h>
-#include <log.h>
+#include <yaz/log.h>
  #include "../index/index.h"
  #include "isamd-p.h"
  
@@ -127,37 +55,168 @@ struct ISAMD_DIFF_s {
   * Input preprocess filter
   ***************************************************************/
  
+
+#define FILTER_NOTYET -1  /* no data read in yet, to be done */
+
  struct ISAMD_FILTER_s {
    ISAMD_I data;          /* where the data comes from */
+  ISAMD is;              /* for debug flags */
    struct it_key k1;      /* the next item to be returned */
+  int           m1;      /* mode for k1 */
+  int           r1;      /* result for read of k1, or NOTYET */
    struct it_key k2;      /* the one after that */
+  int           m2;
+  int           r2;
  };
  
-static void init_filter( ISAMD_I data )
+typedef struct ISAMD_FILTER_s *FILTER;
+
+
+void filter_fill(FILTER F)
+{
+  while ( (F->r1 == FILTER_NOTYET) || (F->r2 == FILTER_NOTYET) )
+  {
+     if (F->r1==FILTER_NOTYET) 
+     { /* move data forward in the filter */
+        F->k1 = F->k2;
+        F->m1 = F->m2;
+        F->r1 = F->r2;
+        if ( 0 != F->r1 ) /* not eof */
+          F->r2 = FILTER_NOTYET; /* say we want more */
+        if (F->is->method->debug > 9)  
+          logf(LOG_LOG,"filt_fill: shift %d.%d m=%d r=%d",
+             F->k1.sysno, 
+             F->k1.seqno, 
+             F->m1, F->r1);
+     }
+     if (F->r2==FILTER_NOTYET)
+     { /* read new bottom value */
+        char *k_ptr = (char*) &F->k2;
+        F->r2 = (F->data->read_item)(F->data->clientData, &k_ptr, &F->m2); 
+        if (F->is->method->debug > 9)
+          logf(LOG_LOG,"filt_fill: read %d.%d m=%d r=%d",
+             F->k2.sysno, F->k2.seqno, F->m2, F->r2);
+     }  
+     if ( (F->k1.sysno == F->k2.sysno) && 
+          (F->k1.seqno == F->k2.seqno) &&
+          (F->m1 != F->m2) &&
+          (F->r1 >0 ) && (F->r2 >0) )
+     { /* del-ins pair of same key (not eof) , ignore both */
+       if (F->is->method->debug > 9)
+         logf(LOG_LOG,"filt_fill: skipped %d.%d m=%d/%d r=%d/%d",
+            F->k1.sysno, F->k1.seqno, 
+            F->m1,F->m2, F->r1,F->r2);
+       F->r1 = FILTER_NOTYET;
+       F->r2 = FILTER_NOTYET;
+     }
+  } /* while */
+} /* filter_fill */
+
+
+FILTER filter_open( ISAMD is, ISAMD_I data )
+{
+  FILTER F = (FILTER) xmalloc(sizeof(struct ISAMD_FILTER_s));
+  F->is = is;
+  F->data = data;
+  F->k1.sysno=0;
+  F->k1.seqno=0;
+  F->k2=F->k1; 
+  F->m1 = F->m2 = 0;
+  F->r1 = F->r2 = FILTER_NOTYET;
+  filter_fill(F);
+  return F;
+}
+
+static void filter_close (FILTER F)
  {
+  xfree(F);
  }
  
-static void close_filter ()
+static int filter_read( FILTER F, 
+                        struct it_key *k,
+                        int *mode)
  {
+  int res;
+  filter_fill(F);
+  if (F->is->method->debug > 9)
+    logf(LOG_LOG,"filt_read: reading %d.%d m=%d r=%d",
+       F->k1.sysno, F->k1.seqno, F->m1, F->r1);
+  res  = F->r1;
+  if(res) 
+  {
+    *k = F->k1;
+    *mode= F->m1;
+  }
+  F->r1 = FILTER_NOTYET;
+  return res;
  }
  
-static int filter_read( struct it_key *k)
+static int filter_isempty(FILTER F)
  {
-  return 0;
+  return ( (0 == F->r1) && (0 == F->r2)) ;
  }
  
-static int filter_empty()
+static int filter_only_one(FILTER F)
  {
-  return 0;
+  return ( (0 != F->r1) && (0 == F->r2));
  }
  
-static int filter_only_one()
+/* We may need backfilling, if we read a lonely key to make */
+/* a singleton, but its bitw will not fit in. Then we need to */
+/* process it normally, which means reading it again. So we  */
+/* need to unread it first. Luckily the filter is empty at that */
+/* point */
+static void filter_backfill(FILTER F, struct it_key *k, int mode)
  {
-  return 0;
+  assert(F->r1 == FILTER_NOTYET ); /* not overwriting data! */
+  F->k1=*k;
+  F->m1=mode;
+  F->r1=1; /* ok read */
  }
  
  
  /***************************************************************
+ * Singleton encoding
+ ***************************************************************/
+/* When there is only a single item, we don't allocate a block
+ * for it, but code it in the directory entry directly, if it
+ * fits.
+ */
+
+#define DEC_SYSBITS 15
+#define DEC_SEQBITS 15
+#define DEC_MASK(n) ((1<<(n))-1)
+
+#define SINGLETON_BIT (1<<(DEC_SYSBITS+DEC_SEQBITS+1))
+
+int is_singleton(ISAMD_P ipos)
+{
+  return 0; /* no singletons any more */
+  return ( ipos != 0 ) && ( ipos & SINGLETON_BIT );
+}
+
+
+int singleton_encode(struct it_key *k)
+/* encodes the key into one int. If it does not fit, returns 0 */
+{
+  return 0; /* no more singletons */
+  if ( (k->sysno & DEC_MASK(DEC_SYSBITS) ) != k->sysno )
+    return 0;  /* no room dor sysno */
+  if ( (k->seqno & DEC_MASK(DEC_SYSBITS) ) != k->seqno )
+    return 0;  /* no room dor sysno */
+  return (k->sysno | (k->seqno << DEC_SYSBITS) ) | SINGLETON_BIT;
+}
+ 
+void singleton_decode (int code, struct it_key *k)
+{
+  assert (code & SINGLETON_BIT);
+  k->sysno = code & DEC_MASK(DEC_SYSBITS);
+  code = code >> DEC_SYSBITS; 
+  k->seqno = code & DEC_MASK(DEC_SEQBITS);
+} 
+ 
+ 
+/***************************************************************
   * General support routines
   ***************************************************************/
  
@@ -302,7 +361,7 @@ static void getDiffInfo(ISAMD_PP pp )
  
        if ( (pp->is->method->debug > 0) &&
           (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) )
-      { /* bug-hunting, this fails on some long runs that log too much */
+      { 
           logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d", 
                         __FILE__,__LINE__, diffidx);
           logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx,
@@ -434,7 +493,8 @@ int isamd_read_item_merge (
                       ISAMD_PP pp, 
                       char **dst,
                       struct it_key *p_key,  /* the data item that didn't fit*/
-                     ISAMD_I data)          /* more input data comes here */
+              /*       ISAMD_I data)  */    /* more input data comes here */
+                     FILTER filt)           /* more input data comes here */
  {                    /* The last two args can be null for ordinary reads */
    char *keyptr;
    char *codeptr;
@@ -470,7 +530,7 @@ int isamd_read_item_merge (
         p_key->sysno=p_key->seqno=0;  /* used it up */
       }
  
-     if (data)
+     if (filt)
       { /* we have a whole input stream to inject */
         pp->diffinfo[i].difftype=DT_INPU;
       }
@@ -479,41 +539,7 @@ int isamd_read_item_merge (
    while (retry)
  
    {
-     retry=0;
-
-#ifdef SKIPTHIS     
-
-     if (0==pp->diffinfo[0].key.sysno) 
-     { /* 0 is special case, main data. */
-        oldoffs=pp->offset;
-        keyptr=(char*) &(pp->diffinfo[0].key);
-        pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr);
-        if (pp->is->method->debug > 7)
-          logf(LOG_LOG,"isamd_read_item: read main at %d-%d %d.%d (%x.%x)",
-            oldoffs,pp->offset,
-            pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno,
-            pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno);
-     } /* get main data */
-
-     if ( (0==pp->diffinfo[1].key.sysno) && (-1==pp->diffinfo[1].maxidx) )
-     { /* 1 is another special case, the input data at merge */
-        keyptr = (char *) &pp->diffinfo[1].key;
-        i = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[1].mode); 
-        if (!i) 
-        {  /* did not get it */
-           pp->diffinfo[1].key.sysno=0;
-           pp->diffinfo[1].maxidx=0; /* signal the end */
-        }
-        if (pp->is->method->debug >7)
-           logf(LOG_LOG,"merge: read diff m=%d %d.%d (%x.%x)",
-              pp->diffinfo[1].mode, 
-              pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno,
-              pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno );        
-     } /* get input data */
-
-#endif // SKIPTHIS
-
-     
+     retry=0;     
       winner = 0;
       for (i=0; (!retry) && (pp->diffinfo[i].difftype); i++)
       {
@@ -570,7 +596,9 @@ int isamd_read_item_merge (
             else if (pp->diffinfo[i].difftype==DT_INPU)
             {
                keyptr = (char *) &pp->diffinfo[i].key;
-              rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); 
+         /*     rc = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[i].mode); */
+              rc = filter_read(filt, &pp->diffinfo[i].key, 
+                                     &pp->diffinfo[i].mode); 
                if (!rc) 
                {  /* did not get it */
                   pp->diffinfo[i].key.sysno=0;
@@ -665,8 +693,11 @@ int isamd_read_item_merge (
      assert(winner==0); /* if nothing found, nothing comes from a diff */
      cmp= 0; /* eof */
    }
-  if (pp->is->method->debug >9)
-     logf(LOG_LOG,"mergeDB4: sysno[1]=%d", pp->diffinfo[1].key.sysno); /*!*/
+  if (cmp)
+    ++(pp->is->no_read_keys);
+  else
+    ++(pp->is->no_read_eof);
+
    return cmp;
     
  } /* isamd_read_item */
@@ -684,7 +715,9 @@ int isamd_read_item (ISAMD_PP pp, char **dst)
  
  static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
                     struct it_key *p_key,  /* the data item that didn't fit*/
-                   ISAMD_I data)          /* more input data comes here */
+                   FILTER filt,           /* more input data arriving here */
+                   char *dictentry,       /* the thin in the dictionary */
+                   int dictlen)           /* and its size */
  {
    int diffidx;  
    int killblk=0;
@@ -693,14 +726,14 @@ static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
    int r_more = 1;
    ISAMD_PP pp;
    ISAMD_PP readpp=firstpp;
-  int retval=0;
+  int retpos=0;
    int diffcat = firstpp->cat;  /* keep the category of the diffblock even */
                                 /* if it is going to be empty now. */
                                 /* Alternative: Make it the minimal, and */
                                 /* resize later. Saves disk, but will lead */
                                 /* into bad seeks. */
    
-  ++(readpp->is->files[0].no_merges);
+  ++(readpp->is->no_merges);
       
    /* set up diffs as they should be for reading */
    diffidx = ISAMD_BLOCK_OFFSET_1; 
@@ -724,7 +757,7 @@ static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
    
  
    r_ptr= (char *) &r_key;
-  r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, data);
+  r_more = isamd_read_item_merge( readpp, &r_ptr, p_key, filt);
    if (!r_more)  
    { /* oops, all data has been deleted! what to do??? */
      /* never mind, we have at least one more delta to add to the block */
@@ -734,18 +767,18 @@ static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
       if (readpp->is->method->debug >5) 
           logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ",
              readpp->numKeys);
-    //assert (readpp->numKeys == 0);  /* no longer true! */
    }
  
  
    /* set up the new blocks for simple writing */
-  firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat));
+  /* firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat)); */
+  firstpp=isamd_pp_create(readpp->is, diffcat);
    firstpp->pos=isamd_alloc_block(firstpp->is,diffcat);
    if (readpp->is->method->debug >3)   
        logf(LOG_LOG,"isamd_merge: allocated new firstpp %d=%d:%d",
            isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos );
    
-  pp=isamd_pp_open(readpp->is,isamd_addr(0,readpp->is->max_cat) );
+  pp=isamd_pp_create(readpp->is,readpp->is->max_cat );
    pp->offset=pp->size=ISAMD_BLOCK_OFFSET_N;
    
    while (r_more)
@@ -767,7 +800,7 @@ static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
  
       /* (try to) read next item */
       r_ptr= (char *) &r_key;
-     r_more = isamd_read_item_merge( readpp, &r_ptr,0,data);
+     r_more = isamd_read_item_merge( readpp, &r_ptr,0,filt);
  
    } /* while read */
    
@@ -798,10 +831,17 @@ static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
    firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;  /* nothing there */
    memset(firstpp->buf,'\0',firstpp->is->method->filecat[firstpp->cat].bsize);
    save_first_pp(firstpp);
-  retval = isamd_addr(firstpp->pos, firstpp->cat);
+  retpos = isamd_addr(firstpp->pos, firstpp->cat);
    isamd_pp_close(firstpp);
  
-  return retval; 
+  /* Create the dict entry */
+  /*!*/ /* it could be this could go in the dict as well, if there's */
+        /* been really many deletes. Somehow I suspect that is not the */
+        /* case. FIXME: Collect statistics and see if needed */
+  dictentry[0]=0; /* mark as a real isam */
+  memcpy(dictentry+1, &retpos, sizeof(ISAMD_P));
+  dictlen=sizeof(ISAMD_P)+1;
+  return dictlen;
    
  } /* merge */
  
@@ -814,8 +854,12 @@ static int merge ( ISAMD_PP firstpp,      /* first pp (with diffs) */
  
  
  
-static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
+static int append_diffs(
+      ISAMD is, 
+      char *dictentry, int dictlen,
+      FILTER filt)
  {
+   ISAMD_P ipos;
     struct it_key i_key;    /* one input item */
     char *i_item = (char *) &i_key;  /* same as chars */
     char *i_ptr=i_item;
@@ -831,26 +875,31 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
     char *c_ptr = codebuff;
     int codelen;
     int merge_rc;
-   int retval=0;
+   ISAMD_P retpos;
+   int dsize;
  
-   if (0==ipos)
+   if (0==dictlen)
     {
-       firstpp=isamd_pp_open(is, isamd_addr(0,0) );
+       firstpp=isamd_pp_create(is, 0 );
         firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1;
           /* create in smallest category, will expand later */
-       ++(is->files[0].no_fbuilds);
+       ++(is->no_fbuilds);
     } 
     else
     {
-       firstpp=isamd_pp_open(is, ipos);
-       ++(is->files[0].no_appds);
+       firstpp=isamd_pp_open(is, dictentry, dictlen);
+       if (dictentry[0] )
+          ipos=0;
+       else 
+           memcpy(&ipos,dictentry+1,sizeof(ISAMD_P));
+       ++(is->no_appds);
     }
  
     if (is->method->debug >2) 
-      logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d",
+      logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d sz=%d",
          ipos, isamd_type(ipos), isamd_block(ipos),
          firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next),
-        firstpp->numKeys);
+        firstpp->numKeys, firstpp->size);
     maxsize = is->method->filecat[firstpp->cat].bsize; 
     
     difflenidx = diffidx = firstpp->size;
@@ -858,8 +907,9 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
     diffidx+=sizeof(int);  /* difflen will be stored here */
     
     /* read first input */
-   i_ptr = i_item;
-   i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); 
+   //i_ptr = i_item;   //!!!
+   i_more = filter_read(filt, &i_key, &i_mode); 
+   /* i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */
  
     if (is->method->debug >6)
        logf(LOG_LOG,"isamd_appd: start m=%d %d.%d=%x.%x: %d",
@@ -875,7 +925,7 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
        i_key.seqno = i_key.seqno * 2 + i_mode;  
  
        c_ptr=codebuff;
-      i_ptr=i_item;
+      i_ptr=i_item; 
        (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData, 
                                 &c_ptr, &i_ptr);
        codelen = c_ptr - codebuff;
@@ -887,31 +937,42 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
  
        if (diffidx + codelen > maxsize )
        { /* block full */
-         if (firstpp->cat < firstpp->is->max_cat)
-         { /* just increase the block size */
+         while ( (firstpp->cat < firstpp->is->max_cat) &&
+                 (diffidx + codelen > maxsize) )
+         { /* try to increase the block size */
               if (firstpp->pos > 0)  /* free the old block if allocated */
                   isamd_release_block(is, firstpp->cat, firstpp->pos);
               ++firstpp->cat;
               maxsize = is->method->filecat[firstpp->cat].bsize; 
               firstpp->pos=0; /* need to allocate it when saving */             
               if (is->method->debug >3)
-                logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)",
+                logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)",
                     firstpp->cat, maxsize);
           }
-         else 
-         { /* max size already - can't help, need to merge it */
+         if  ((firstpp->cat >= firstpp->is->max_cat) &&
+                 (diffidx + codelen > maxsize) )
+         { /* max size - can't help, need to merge it */
               if (is->method->debug >7)
-                logf(LOG_LOG,"isamd_appd: block full");
+                logf(LOG_LOG,"isamd_appd: need to merge");
               if (is->method->debug >9)  //!!!!!
                  logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d",
                       i_mode, i_key.sysno, i_key.seqno);
-             merge_rc = merge (firstpp, &i_key, data);
+             merge_rc = merge (firstpp, &i_key, filt, dictentry, dictlen);
               if (0!=merge_rc)
                 return merge_rc;  /* merge handled them all ! */
               assert(!"merge returned zero ??");
           } /* need to merge */
        } /* block full */
-      
+
+      if (!( diffidx+codelen <= maxsize )) 
+      { /* bug hunting */
+         logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d",
+           diffidx, codelen, diffidx+codelen, maxsize);
+         logf(LOG_LOG,"ipos=%d f=%d=%d:%d",
+           ipos, 
+           isamd_addr(firstpp->pos, firstpp->cat),
+           firstpp->cat, firstpp->pos );
+      }
        assert ( diffidx+codelen <= maxsize );
        
        /* save the diff */ 
@@ -929,7 +990,8 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
        
        /* (try to) read the next input */
        i_ptr = i_item;
-      i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); 
+      i_more = filter_read(filt, &i_key, &i_mode); 
+    /*  i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode); */
        if ( (i_more) && (is->method->debug >6) )
           logf(LOG_LOG,"isamd_appd: got m=%d %d.%d=%x.%x: %d",
              i_mode, 
@@ -943,25 +1005,94 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
     while ( (difflenidx-diffidx<=sizeof(int)+1) && (difflenidx<maxsize))
       firstpp->buf[difflenidx++]='\0';
  
-   if (0==firstpp->pos)  /* need to (re)alloc the block */
-      firstpp->pos = isamd_alloc_block(is, firstpp->cat);
+   if (firstpp->numKeys==0)
+   { 
+       /* FIXME: Release blocks that may be allocated !!! */
+       return 0; /* don't bother storing this! */
+   }
  
-   retval = save_first_pp( firstpp );
-   isamd_pp_close(firstpp);
+   dsize=diffidx-ISAMD_BLOCK_OFFSET_1;
+   /* logf(LOG_LOG,"!! nxt=%d diffidx=%d ds=%d", 
+           firstpp->next, diffidx, dsize);  */
+
+   if ( (0==firstpp->next) && (dsize <ISAMD_MAX_DICT_LEN))
+   {
+        /* logf(LOG_LOG,"building a dict entry!!"); */
+        assert(firstpp->numKeys < 128);
+        assert(firstpp->numKeys >0);
+        /* actually, 255 is good enough, but sign mismatches... */
+        /* in real life, 4-5 is as much as we can hope for, as long */
+        /* as ISAMD_MAX_DICT_LEN is reasonably small (8) */
+        dictentry[0]=firstpp->numKeys;
+        memcpy(dictentry+1, firstpp->buf+ISAMD_BLOCK_OFFSET_1, dsize);
+        dictlen=dsize+1;
+   }
+   else 
+   {
+       if (0==firstpp->pos)  /* need to (re)alloc the block */
+           firstpp->pos = isamd_alloc_block(is, firstpp->cat);
+       retpos = save_first_pp( firstpp );
+       isamd_pp_close(firstpp);
+       dictentry[0]=0; /* mark as a real isam */
+       memcpy(dictentry+1, &retpos, sizeof(ISAMD_P));
+       dictlen=sizeof(ISAMD_P)+1;
+   }
      
-   return retval;
+   return dictlen;
  } /* append_diffs */
  
  
  
  
  /*************************************************************
- * isamd_append itself, Sweet, isn't it
+ * isamd_append itself
   *************************************************************/
  
-ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
+int isamd_append (ISAMD is, char *dictentry, int dictlen, ISAMD_I data)
+/*ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) */
  {
-   return append_diffs(is,ipos,data);
+   FILTER F = filter_open(is,data);
+   int newlen=0;
+
+   if ( filter_isempty(F) ) /* can be, if del-ins of the same */
+   {
+      if (is->method->debug >3) 
+         logf(LOG_LOG,"isamd_appd: nothing to do ");
+      filter_close(F);
+      ++(is->no_non);
+      return dictlen; /* without doing anything at all */
+   }
+
+#ifdef SKIPTHIS 
+   /* The old way to handle singletons */
+   if ( ( 0==ipos) && filter_only_one(F) )
+   {
+      struct it_key k;
+      int mode;
+      filter_read(F,&k,&mode);     
+      assert(mode); 
+      rc = singleton_encode(&k);
+      if (!rc) 
+      {
+      if (is->method->debug >9) 
+         logf(LOG_LOG,"isamd_appd: singleton didn't fit, backfilling");
+         filter_backfill(F,&k, mode);
+      }
+      if (is->method->debug >9) 
+         logf(LOG_LOG,"isamd_appd: singleton %d (%x)",
+           rc,rc);
+      if (rc)
+        is->no_singles++;
+      assert ( (rc==0) || is_singleton(rc) );
+   }
+   newlen = append_diffs(is,ipos,F); 
+#endif
+   newlen = append_diffs(is,dictentry,dictlen,F); 
+   filter_close(F);
+
+   if (is->method->debug >2) 
+      logf(LOG_LOG,"isamd_appd: ret len=%d ", newlen);
+   return newlen;
  } /*  isamd_append */
  
  
@@ -972,7 +1103,33 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
  
  /*
   * $Log: merge-d.c,v $
- * Revision 1.20  1999-09-20 15:48:06  heikki
+ * Revision 1.28  2002-08-02 19:26:56  adam
+ * Towards GPL
+ *
+ * Revision 1.27  2002/07/12 18:12:21  heikki
+ * Isam-D now stores small entries directly in the dictionary.
+ * Needs more tuning and cleaning...
+ *
+ * Revision 1.26  2002/07/11 16:16:00  heikki
+ * Fixed a bug in isamd, failed to store a single key when its bits
+ * did not fit into a singleton.
+ *
+ * Revision 1.25  1999/11/30 13:48:04  adam
+ * Improved installation. Updated for inclusion of YAZ header files.
+ *
+ * Revision 1.24  1999/10/05 09:57:40  heikki
+ * Tuning the isam-d (and fixed a small "detail")
+ *
+ * Revision 1.23  1999/09/27 14:36:36  heikki
+ * singletons
+ *
+ * Revision 1.22  1999/09/23 18:01:18  heikki
+ * singleton optimising
+ *
+ * Revision 1.21  1999/09/21 17:36:43  heikki
+ * Added filter function. Not much of effect on the small test set...
+ *
+ * Revision 1.20  1999/09/20 15:48:06  heikki
   * Small changes
   *
   * Revision 1.19  1999/09/13 13:28:28  heikki
@@ -1031,5 +1188,3 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
   */
  
  
-
-