Comments about optimising

[idzebra-moved-to-github.git] / isamc / merge-d.c
diff --git a/isamc/merge-d.c b/isamc/merge-d.c

index 8c41ebd..3f12488 100644 (file)
--- a/isamc/merge-d.c
+++ b/isamc/merge-d.c
@@ -3,20 +3,29 @@
   * See the file LICENSE for details.
   * Heikki Levanto
   *
- * $Id: merge-d.c,v 1.13 1999-08-18 13:59:19 heikki Exp $
+ * $Id: merge-d.c,v 1.16 1999-08-24 10:12:02 heikki Exp $
   *
- * todo
- *  - Clean up log levels
+ * missing
+ *
+ * optimize
   *  - Input filter: Eliminate del-ins pairs, tell if only one entry (or none)
- *  - single-entry optimizing
+ *  - single-entry optimizing (keep the one entry in the dict, no block)
   *  - study and optimize block sizes (later)
   *  - Clean up the different ways diffs are handled in writing and reading
   *  - Keep a merge-count in the firstpp, and if the block has already been
   *    merged, reduce it to a larger size even if it could fit in a small one!
   *  - Keep minimum freespace in the category table, and use that in reduce!
+ *  - pass a space-needed for separateDiffBlock and reduce to be able to 
+ *    reserve more room for diffs, or to force a separate (larger?) block
+ *  - Idea: Simplify the structure, so that the first block is always diffs.
+ *    On small blocks, that is all we have. Once a block has been merged, we
+ *    allocate the first main block and a (new) firstblock ffor diffs. From
+ *    that point on the word has two blocks for it. 
+ *  - On allocating more blocks (in append), check the order of blocks, and
+ *    if needed, swap them. 
+ *  - In merge, merge also with the input data.
   *
   * bugs
- *  - Still has not been able to run a complete long test on bagel!
   *
   * caveat
   *  There is a confusion about the block addresses. cat or type is the category,
@@ -32,7 +41,7 @@
   *  Needs cleaning! The way diff blocks are handled in append and reading is
   *  quite different, and likely to give maintenance problems.
   *
- *  log levels (set isamd=x in zebra.cfg (or what ever cfg file you use) )
+ *  log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) )
   *    0 = no logging. Default
   *    1 = no logging here. isamd logs overall statistics
   *    2 = Each call to isamd_append with start address and no more
@@ -43,7 +52,7 @@
   *    7 = Log each record as it passes the system (once)
   *    8 = Log raw and (de)coded data
   *    9 = Anything else that may be useful
- *   .. = Anything needed toi hunt a specific bug
+ *   .. = Anything needed to hunt a specific bug
   *  (note that all tests in the code are like debug>3, which means 4 or above!)
   */
  
@@ -572,6 +581,8 @@ static int isamd_build_first_block(ISAMD is, ISAMD_I data)
     
     char hexbuff[64];
     
+   ++(is->files[0].no_fbuilds);
+
     firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat));
     firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;
     
@@ -633,6 +644,8 @@ static int merge ( ISAMD_PP *p_firstpp,   /* first pp of the chain */
    ISAMD_PP firstpp;  /* the new first, the one we write into */
    ISAMD_PP pp;
    void *encoder_data;
+
+  ++(readpp->is->files[0].no_merges);
       
    /* set up diffs as they should be for reading */
    readpp->offset= ISAMD_BLOCK_OFFSET_1; 
@@ -729,9 +742,6 @@ static int merge ( ISAMD_PP *p_firstpp,   /* first pp of the chain */
  
    } /* while read */
    
-  /* TODO: while pkey is an insert, and after last key inserted, append it */
-  /* will prevent multiple merges on large insert runs */  
-
    /* set things up so that append can continue */
    isamd_reduceblock(firstpp);
    firstpp->diffs=0; 
@@ -791,6 +801,9 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
     char *c_ptr = codebuff;
     int codelen;
     int merge_rc;
+   int mergecount=0;
+
+   ++(is->files[0].no_appds);
  
     firstpp=isamd_pp_open(is, ipos);
     if (is->method->debug >2) 
@@ -837,6 +850,8 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
              logf(LOG_LOG,"isamd_appd: block pp=%p buf=%p [%d]:%s",
                 pp, pp->buf, 
                 difflenidx, hexdump(&pp->buf[difflenidx],8,0));
+         if (mergecount++)
+             ++(is->files[0].no_remerges);
           merge_rc = merge (&firstpp, &pp, &i_key);
           if (0!=merge_rc)
             return merge_rc;  /* merge handled them all ! */
@@ -916,23 +931,22 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
     else
        retval = append_diffs(is,ipos,data);
  
-   if (0)  /*!*/ 
-   {
-     void *p1=xmalloc(100);
-     void *p2=xmalloc(100);
-     void *p3=xmalloc(100);
-     logf(LOG_LOG,"Traversing xmalloc stuff. p1=%p p2=%p p3=%p",p1,p2,p3);
-     xmalloc_trav("end of append"); /*!*/
-     assert(!"foo");
-   }
-   
     return retval;
  } /*  isamd_append */
  
  
  /*
   * $Log: merge-d.c,v $
- * Revision 1.13  1999-08-18 13:59:19  heikki
+ * Revision 1.16  1999-08-24 10:12:02  heikki
+ * Comments about optimising
+ *
+ * Revision 1.15  1999/08/22 08:26:34  heikki
+ * COmments
+ *
+ * Revision 1.14  1999/08/20 12:25:58  heikki
+ * Statistics in isamd
+ *
+ * Revision 1.13  1999/08/18 13:59:19  heikki
   * Fixed another unlikely difflen bug
   *
   * Revision 1.12  1999/08/18 13:28:17  heikki