X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamc%2Fmerge-d.c;h=3f12488133262cfbe497d24ed0f295831fd2062d;hb=6d1c06c091a197d911a8e15737ff01ec7ab92166;hp=8c41ebdb1301876711ffa0f86dd13536fa088907;hpb=544e8fcc65541951ba92d0212838a0bebbc3084c;p=idzebra-moved-to-github.git diff --git a/isamc/merge-d.c b/isamc/merge-d.c index 8c41ebd..3f12488 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,20 +3,29 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.13 1999-08-18 13:59:19 heikki Exp $ + * $Id: merge-d.c,v 1.16 1999-08-24 10:12:02 heikki Exp $ * - * todo - * - Clean up log levels + * missing + * + * optimize * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none) - * - single-entry optimizing + * - single-entry optimizing (keep the one entry in the dict, no block) * - study and optimize block sizes (later) * - Clean up the different ways diffs are handled in writing and reading * - Keep a merge-count in the firstpp, and if the block has already been * merged, reduce it to a larger size even if it could fit in a small one! * - Keep minimum freespace in the category table, and use that in reduce! + * - pass a space-needed for separateDiffBlock and reduce to be able to + * reserve more room for diffs, or to force a separate (larger?) block + * - Idea: Simplify the structure, so that the first block is always diffs. + * On small blocks, that is all we have. Once a block has been merged, we + * allocate the first main block and a (new) firstblock ffor diffs. From + * that point on the word has two blocks for it. + * - On allocating more blocks (in append), check the order of blocks, and + * if needed, swap them. + * - In merge, merge also with the input data. * * bugs - * - Still has not been able to run a complete long test on bagel! * * caveat * There is a confusion about the block addresses. cat or type is the category, @@ -32,7 +41,7 @@ * Needs cleaning! The way diff blocks are handled in append and reading is * quite different, and likely to give maintenance problems. * - * log levels (set isamd=x in zebra.cfg (or what ever cfg file you use) ) + * log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) ) * 0 = no logging. Default * 1 = no logging here. isamd logs overall statistics * 2 = Each call to isamd_append with start address and no more @@ -43,7 +52,7 @@ * 7 = Log each record as it passes the system (once) * 8 = Log raw and (de)coded data * 9 = Anything else that may be useful - * .. = Anything needed toi hunt a specific bug + * .. = Anything needed to hunt a specific bug * (note that all tests in the code are like debug>3, which means 4 or above!) */ @@ -572,6 +581,8 @@ static int isamd_build_first_block(ISAMD is, ISAMD_I data) char hexbuff[64]; + ++(is->files[0].no_fbuilds); + firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat)); firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1; @@ -633,6 +644,8 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ ISAMD_PP firstpp; /* the new first, the one we write into */ ISAMD_PP pp; void *encoder_data; + + ++(readpp->is->files[0].no_merges); /* set up diffs as they should be for reading */ readpp->offset= ISAMD_BLOCK_OFFSET_1; @@ -729,9 +742,6 @@ static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */ } /* while read */ - /* TODO: while pkey is an insert, and after last key inserted, append it */ - /* will prevent multiple merges on large insert runs */ - /* set things up so that append can continue */ isamd_reduceblock(firstpp); firstpp->diffs=0; @@ -791,6 +801,9 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) char *c_ptr = codebuff; int codelen; int merge_rc; + int mergecount=0; + + ++(is->files[0].no_appds); firstpp=isamd_pp_open(is, ipos); if (is->method->debug >2) @@ -837,6 +850,8 @@ static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data) logf(LOG_LOG,"isamd_appd: block pp=%p buf=%p [%d]:%s", pp, pp->buf, difflenidx, hexdump(&pp->buf[difflenidx],8,0)); + if (mergecount++) + ++(is->files[0].no_remerges); merge_rc = merge (&firstpp, &pp, &i_key); if (0!=merge_rc) return merge_rc; /* merge handled them all ! */ @@ -916,23 +931,22 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) else retval = append_diffs(is,ipos,data); - if (0) /*!*/ - { - void *p1=xmalloc(100); - void *p2=xmalloc(100); - void *p3=xmalloc(100); - logf(LOG_LOG,"Traversing xmalloc stuff. p1=%p p2=%p p3=%p",p1,p2,p3); - xmalloc_trav("end of append"); /*!*/ - assert(!"foo"); - } - return retval; } /* isamd_append */ /* * $Log: merge-d.c,v $ - * Revision 1.13 1999-08-18 13:59:19 heikki + * Revision 1.16 1999-08-24 10:12:02 heikki + * Comments about optimising + * + * Revision 1.15 1999/08/22 08:26:34 heikki + * COmments + * + * Revision 1.14 1999/08/20 12:25:58 heikki + * Statistics in isamd + * + * Revision 1.13 1999/08/18 13:59:19 heikki * Fixed another unlikely difflen bug * * Revision 1.12 1999/08/18 13:28:17 heikki