From 72ca1631366b5196106ac777e492b6a70c273651 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Tue, 5 Oct 1999 09:57:40 +0000 Subject: [PATCH] Tuning the isam-d (and fixed a small "detail") --- include/isamd.h | 5 ++ isamc/isamd-p.h | 38 ++++++++---- isamc/isamd.c | 177 +++++++++++++++++++++++++++++++++++++++++-------------- isamc/merge-d.c | 51 +++++++++++----- 4 files changed, 200 insertions(+), 71 deletions(-) diff --git a/include/isamd.h b/include/isamd.h index da8496b..9dc95fe 100644 --- a/include/isamd.h +++ b/include/isamd.h @@ -77,6 +77,8 @@ int isamd_block_size (ISAMD is, int type); void isamd_buildfirstblock(ISAMD_PP pp); void isamd_buildlaterblock(ISAMD_PP pp); + + #ifdef __cplusplus } #endif @@ -86,6 +88,9 @@ void isamd_buildlaterblock(ISAMD_PP pp); /* * $Log: isamd.h,v $ + * Revision 1.3 1999/08/18 08:33:41 heikki + * Fixes + * * Revision 1.2 1999/07/14 13:21:34 heikki * Added isam-d files. Compiles (almost) clean. Doesn't work at all * diff --git a/isamc/isamd-p.h b/isamc/isamd-p.h index 746c14c..84f8b82 100644 --- a/isamc/isamd-p.h +++ b/isamc/isamd-p.h @@ -1,4 +1,4 @@ -/* $Id: isamd-p.h,v 1.8 1999-09-23 18:01:18 heikki Exp $ +/* $Id: isamd-p.h,v 1.9 1999-10-05 09:57:40 heikki Exp $ * Copyright (c) 1995-1996, Index Data. * See the file LICENSE for details. * Heikki Levanto @@ -32,20 +32,15 @@ typedef struct ISAMD_file_s { int no_released; int no_remap; - int no_forward; + int no_forward; /* stats from pp_read, for isam-c compatibility */ int no_backward; int sum_forward; int sum_backward; int no_next; int no_prev; - int no_op_nodiff; /* existing blocks opened for reading without diffs */ - int no_op_intdiff; /* - with internal diffs */ - int no_op_extdiff; /* with separate diff blocks */ - int no_fbuilds; /* number of first-time builds */ - int no_appds; /* number of appends */ - int no_merges; /* number of merges done */ - int no_remerges; /* number of times more than one merge needed */ + int no_op_diffonly;/* number of opens without mainblock */ + int no_op_main; /* number of opens with a main block */ char *alloc_buf; /* free-list handling (?) */ int alloc_entries_num; @@ -60,6 +55,26 @@ struct ISAMD_s { int max_cat; ISAMD_M method; ISAMD_file files; + int last_pos; /* last read/write position for seek stats */ + int last_cat; /* same for category */ + int no_read; /* blocks read (in all categories) */ + int no_write; /* blocks written (in all categories) */ + int no_op_single;/* singleton "blocks" opened */ + int no_read_keys;/* number of keys read (occurences of words) */ + int no_read_main;/* number of main keys read (not diffs) */ + int no_read_eof; /* number of key sequence ends read (no of words read) */ + int no_seek_nxt; /* seeks to the next record (fast) */ + int no_seek_sam; /* seeks to same record (fast) */ + int no_seek_fwd; /* seeks forward */ + int no_seek_prv; /* seeks to previous */ + int no_seek_bak; /* seeks backwards */ + int no_seek_cat; /* seeks to different category (expensive) */ + int no_op_new; /* "open"s for new blocks */ + int no_fbuilds; /* number of first-time builds */ + int no_appds; /* number of appends */ + int no_merges; /* number of merges done */ + int no_non; /* merges without any work */ + int no_singles; /* new items resulting in singletons */ }; @@ -110,7 +125,10 @@ int singleton_encode(struct it_key *k); /* * $Log: isamd-p.h,v $ - * Revision 1.8 1999-09-23 18:01:18 heikki + * Revision 1.9 1999-10-05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.8 1999/09/23 18:01:18 heikki * singleton optimising * * Revision 1.7 1999/09/20 15:48:06 heikki diff --git a/isamc/isamd.c b/isamc/isamd.c index b1a2122..191a51c 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 1995-1998, Index Data. * See the file LICENSE for details. - * $Id: isamd.c,v 1.15 1999-09-27 14:36:36 heikki Exp $ + * $Id: isamd.c,v 1.16 1999-10-05 09:57:40 heikki Exp $ * * Isamd - isam with diffs * Programmed by: Heikki Levanto @@ -38,22 +38,35 @@ ISAMD_M isamd_getmethod (ISAMD_M me) { 64, 0 }, #else { 32, 1 }, + { 64, 1 }, + { 128, 1 }, + { 512, 1 }, + { 2048, 1 }, + { 8192, 1 }, + { 32768, 0 }, + +#endif +#ifdef SKIPTHIS + + + + { 32, 1 }, { 128, 1 }, { 512, 1 }, { 2048, 1 }, { 8192, 1 }, { 32768, 1 }, {131072, 0 }, -#endif -/* old values from isamc, long time ago... - { 24, 40 }, - { 128, 256 }, - { 512, 1024 }, - { 2048, 4096 }, - { 8192,16384 }, - { 32768, 0 }, -*/ + { 24, 1 }, /* Experimental sizes */ + { 32, 1 }, + { 64, 1 }, + { 128, 1 }, + { 256, 1 }, + { 512, 1 }, + { 1024, 1 }, + { 2048, 0 }, +#endif }; ISAMD_M m = (ISAMD_M) xmalloc (sizeof(*m)); /* never released! */ @@ -136,16 +149,31 @@ ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method) is->files[i].sum_backward = 0; is->files[i].no_next = 0; is->files[i].no_prev = 0; - is->files[i].no_op_nodiff=0; - is->files[i].no_op_intdiff=0; - is->files[i].no_op_extdiff=0; - is->files[i].no_fbuilds=0; - is->files[i].no_appds=0; - is->files[i].no_merges=0; - is->files[i].no_remerges=0; - + is->files[i].no_op_diffonly=0; + is->files[i].no_op_main=0; init_fc (is, i); } + is->last_pos=0; + is->last_cat=0; + is->no_read=0; + is->no_read_main=0; + is->no_write=0; + is->no_op_single=0; + is->no_op_new=0; + is->no_read_keys=0; + is->no_read_eof=0; + is->no_seek_nxt=0; + is->no_seek_sam=0; + is->no_seek_fwd=0; + is->no_seek_prv=0; + is->no_seek_bak=0; + is->no_seek_cat=0; + is->no_fbuilds=0; + is->no_appds=0; + is->no_merges=0; + is->no_non=0; + is->no_singles=0; + return is; } @@ -167,23 +195,24 @@ int isamd_block_size (ISAMD is, int type) int isamd_close (ISAMD is) { int i; + int s; if (is->method->debug>0) { logf (LOG_LOG, "isamd statistics"); - logf (LOG_LOG, "f nxt forw mid-f prev backw mid-b"); + logf (LOG_LOG, "f nxt forw mid-f prev backw mid-b"); for (i = 0; ino_files; i++) - logf (LOG_LOG, "%d%8d%8d%8.1f%8d%8d%8.1f",i, + logf (LOG_LOG, "%d%7d%7d%7.1f%7d%7d%7.1f",i, is->files[i].no_next, is->files[i].no_forward, is->files[i].no_forward ? - (double) is->files[i].sum_forward/is->files[i].no_forward - : 0.0, + (double) is->files[i].sum_forward/is->files[i].no_forward + : 0.0, is->files[i].no_prev, is->files[i].no_backward, is->files[i].no_backward ? - (double) is->files[i].sum_backward/is->files[i].no_backward - : 0.0); + (double) is->files[i].sum_backward/is->files[i].no_backward + : 0.0); } if (is->method->debug>0) logf (LOG_LOG, "f writes reads skipped alloc released "); @@ -208,23 +237,50 @@ int isamd_close (ISAMD is) if (is->method->debug>0) { - logf (LOG_LOG, "f opens simple int ext"); + logf (LOG_LOG, "f opens main diffonly"); for (i = 0; ino_files; i++) { - logf (LOG_LOG, "%d%8d%8d%8d%8d",i, - is->files[i].no_op_nodiff+ - is->files[i].no_op_intdiff+ - is->files[i].no_op_extdiff, - is->files[i].no_op_nodiff, - is->files[i].no_op_intdiff, - is->files[i].no_op_extdiff); + logf (LOG_LOG, "%d%8d%8d%8d",i, + is->files[i].no_op_main+ + is->files[i].no_op_diffonly, + is->files[i].no_op_main, + is->files[i].no_op_diffonly); } - logf (LOG_LOG, " build append merge remrg"); - logf (LOG_LOG, "=%8d%8d%8d%8d", - is->files[0].no_fbuilds, - is->files[0].no_appds, - is->files[0].no_merges, - is->files[0].no_remerges); + logf(LOG_LOG,"single %8d", is->no_op_single); + logf(LOG_LOG,"new %8d", is->no_op_new); + + logf(LOG_LOG, "new build %8d", is->no_fbuilds); + logf(LOG_LOG, "append %8d", is->no_appds); + logf(LOG_LOG, " merges %8d", is->no_merges); + logf(LOG_LOG, " singles %8d", is->no_singles); + logf(LOG_LOG, " non %8d", is->no_non); + + logf(LOG_LOG, "read blocks %8d", is->no_read); + logf(LOG_LOG, "read keys: %8d %8.1f k/bl", + is->no_read_keys, + 1.0*(is->no_read_keys+1)/(is->no_read+1) ); + logf(LOG_LOG, "read main-k %8d %8.1f %% of keys", + is->no_read_main, + 100.0*(is->no_read_main+1)/(is->no_read_keys+1) ); + logf(LOG_LOG, "read ends: %8d %8.1f k/e", + is->no_read_eof, + 1.0*(is->no_read_keys+1)/(is->no_read_eof+1) ); + s= is->no_seek_nxt+ is->no_seek_sam+ is->no_seek_fwd + + is->no_seek_prv+ is->no_seek_bak+ is->no_seek_cat; + if (s==0) + s++; + logf(LOG_LOG, "seek same %8d %8.1f%%", + is->no_seek_sam, 100.0*is->no_seek_sam/s ); + logf(LOG_LOG, "seek next %8d %8.1f%%", + is->no_seek_nxt, 100.0*is->no_seek_nxt/s ); + logf(LOG_LOG, "seek prev %8d %8.1f%%", + is->no_seek_prv, 100.0*is->no_seek_prv/s ); + logf(LOG_LOG, "seek forw %8d %8.1f%%", + is->no_seek_fwd, 100.0*is->no_seek_fwd/s ); + logf(LOG_LOG, "seek back %8d %8.1f%%", + is->no_seek_bak, 100.0*is->no_seek_bak/s ); + logf(LOG_LOG, "seek cat %8d %8.1f%%", + is->no_seek_cat, 100.0*is->no_seek_cat/s ); } xfree (is->files); xfree (is->method); @@ -232,9 +288,29 @@ int isamd_close (ISAMD is) return 0; } +static void isamd_seek_stat(ISAMD is, int cat, int pos) +{ + if (cat != is->last_cat) + is->no_seek_cat++; + else if ( pos == is->last_pos) + is->no_seek_sam++; + else if ( pos == is->last_pos+1) + is->no_seek_nxt++; + else if ( pos == is->last_pos-1) + is->no_seek_prv++; + else if ( pos > is->last_pos) + is->no_seek_fwd++; + else if ( pos < is->last_pos) + is->no_seek_bak++; + is->last_cat = cat; + is->last_pos = pos; +} /* seek_stat */ + int isamd_read_block (ISAMD is, int cat, int pos, char *dst) { + isamd_seek_stat(is,cat,pos); ++(is->files[cat].no_reads); + ++(is->no_read); if (is->method->debug > 6) logf (LOG_LOG, "isamd: read_block %d:%d",cat, pos); return bf_read (is->files[cat].bf, pos, 0, 0, dst); @@ -242,7 +318,9 @@ int isamd_read_block (ISAMD is, int cat, int pos, char *dst) int isamd_write_block (ISAMD is, int cat, int pos, char *src) { + isamd_seek_stat(is,cat,pos); ++(is->files[cat].no_writes); + ++(is->no_write); if (is->method->debug > 6) logf (LOG_LOG, "isamd: write_block %d:%d", cat, pos); return bf_write (is->files[cat].bf, pos, 0, 0, src); @@ -525,12 +603,16 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) ipos,ipos, singlekey.sysno, singlekey.seqno/2, pp->size ); + is->no_op_single++; return pp; } /* singleton */ pp->cat = isamd_type(ipos); pp->pos = isamd_block(ipos); - + + if (0==pp->pos) + is->no_op_new++; + if (pp->pos) { src = pp->buf; @@ -541,10 +623,14 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) src += sizeof(pp->size); memcpy (&pp->numKeys, src, sizeof(pp->numKeys)); src += sizeof(pp->numKeys); - assert (pp->next != pp->pos); + assert (pp->next != isamd_addr(pp->pos,pp->cat)); pp->offset = src - pp->buf; assert (pp->offset == ISAMD_BLOCK_OFFSET_1); assert(pp->size>=ISAMD_BLOCK_OFFSET_1); /*??*/ + if (pp->next) + is->files[pp->cat].no_op_main++; + else + is->files[pp->cat].no_op_diffonly++; } if (is->method->debug > 5) logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d", @@ -559,15 +645,13 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) void isamd_buildfirstblock(ISAMD_PP pp){ char *dst=pp->buf; assert(pp->buf); - assert(pp->next != pp->pos); + assert(pp->next != isamd_addr(pp->pos,pp->cat)); memcpy(dst, &pp->next, sizeof(pp->next) ); dst += sizeof(pp->next); memcpy(dst, &pp->size,sizeof(pp->size)); dst += sizeof(pp->size); memcpy(dst, &pp->numKeys, sizeof(pp->numKeys)); dst += sizeof(pp->numKeys); -// memcpy(dst, &pp->diffs, sizeof(pp->diffs)); -// dst += sizeof(pp->diffs); assert (dst - pp->buf == ISAMD_BLOCK_OFFSET_1); if (pp->is->method->debug > 5) logf (LOG_LOG, "isamd: bldfirst: p=%d=%d:%d n=%d:%d:%d sz=%d nk=%d ", @@ -647,7 +731,7 @@ int isamd_read_main_item (ISAMD_PP pp, char **dst) newcat = isamd_type(pp->next); pp->pos = isamd_block(pp->next); pp->cat = isamd_type(pp->next); - + pp->is->no_read_main++; src = pp->buf; /* read block and save 'next' and 'size' entry */ isamd_read_block (is, pp->cat, pp->pos, src); @@ -761,7 +845,10 @@ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) /* * $Log: isamd.c,v $ - * Revision 1.15 1999-09-27 14:36:36 heikki + * Revision 1.16 1999-10-05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.15 1999/09/27 14:36:36 heikki * singletons * * Revision 1.14 1999/09/23 18:01:18 heikki diff --git a/isamc/merge-d.c b/isamc/merge-d.c index b24c516..ed553df 100644 --- a/isamc/merge-d.c +++ b/isamc/merge-d.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Heikki Levanto * - * $Id: merge-d.c,v 1.23 1999-09-27 14:36:36 heikki Exp $ + * $Id: merge-d.c,v 1.24 1999-10-05 09:57:40 heikki Exp $ * * bugs * sinleton-bit has to be in the high end, not low, so as not to confuse @@ -751,8 +751,11 @@ int isamd_read_item_merge ( assert(winner==0); /* if nothing found, nothing comes from a diff */ cmp= 0; /* eof */ } - if (pp->is->method->debug >9) - logf(LOG_LOG,"mergeDB4: sysno[1]=%d", pp->diffinfo[1].key.sysno); /*!*/ + if (cmp) + ++(pp->is->no_read_keys); + else + ++(pp->is->no_read_eof); + return cmp; } /* isamd_read_item */ @@ -787,7 +790,7 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ /* resize later. Saves disk, but will lead */ /* into bad seeks. */ - ++(readpp->is->files[0].no_merges); + ++(readpp->is->no_merges); /* set up diffs as they should be for reading */ diffidx = ISAMD_BLOCK_OFFSET_1; @@ -822,7 +825,6 @@ static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */ if (readpp->is->method->debug >5) logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ", readpp->numKeys); - //assert (readpp->numKeys == 0); /* no longer true! */ } @@ -930,12 +932,12 @@ static int append_diffs( firstpp=isamd_pp_open(is, isamd_addr(0,0) ); firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1; /* create in smallest category, will expand later */ - ++(is->files[0].no_fbuilds); + ++(is->no_fbuilds); } else { firstpp=isamd_pp_open(is, ipos); - ++(is->files[0].no_appds); + ++(is->no_appds); } if (is->method->debug >2) @@ -980,21 +982,23 @@ static int append_diffs( if (diffidx + codelen > maxsize ) { /* block full */ - if (firstpp->cat < firstpp->is->max_cat) - { /* just increase the block size */ + while ( (firstpp->cat < firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* try to increase the block size */ if (firstpp->pos > 0) /* free the old block if allocated */ isamd_release_block(is, firstpp->cat, firstpp->pos); ++firstpp->cat; maxsize = is->method->filecat[firstpp->cat].bsize; firstpp->pos=0; /* need to allocate it when saving */ if (is->method->debug >3) - logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)", + logf(LOG_LOG,"isamd_appd: increased diff block sz to %d (%d)", firstpp->cat, maxsize); } - else - { /* max size already - can't help, need to merge it */ + if ((firstpp->cat >= firstpp->is->max_cat) && + (diffidx + codelen > maxsize) ) + { /* max size - can't help, need to merge it */ if (is->method->debug >7) - logf(LOG_LOG,"isamd_appd: block full"); + logf(LOG_LOG,"isamd_appd: need to merge"); if (is->method->debug >9) //!!!!! logf(LOG_LOG,"isamd_appd: going to merge with m=%d %d.%d", i_mode, i_key.sysno, i_key.seqno); @@ -1004,7 +1008,16 @@ static int append_diffs( assert(!"merge returned zero ??"); } /* need to merge */ } /* block full */ - + + if (!( diffidx+codelen <= maxsize )) + { /* bug hunting */ + logf(LOG_LOG,"OOPS, diffidx problem: d=%d c=%d s=%d > m=%d", + diffidx, codelen, diffidx+codelen, maxsize); + logf(LOG_LOG,"ipos=%d f=%d=%d:%d", + ipos, + isamd_addr(firstpp->pos, firstpp->cat), + firstpp->cat, firstpp->pos ); + } assert ( diffidx+codelen <= maxsize ); /* save the diff */ @@ -1059,7 +1072,7 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) ISAMD_P rc=0; int olddebug= is->method->debug; - if (ipos == 1846) + if (ipos == 7320) is->method->debug = 99; /*!*/ if ( filter_isempty(F) ) /* can be, if del-ins of the same */ @@ -1067,6 +1080,7 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) if (is->method->debug >3) logf(LOG_LOG,"isamd_appd: nothing to do for %d=",ipos); filter_close(F); + ++(is->no_non); return ipos; /* without doing anything at all */ } @@ -1080,6 +1094,8 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) if (is->method->debug >9) logf(LOG_LOG,"isamd_appd: singleton %d (%x)", rc,rc); + if (rc) + is->no_singles++; assert ( (rc==0) || is_singleton(rc) ); } if ( 0==rc) /* either not single, or it did not fit */ @@ -1106,7 +1122,10 @@ ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data) /* * $Log: merge-d.c,v $ - * Revision 1.23 1999-09-27 14:36:36 heikki + * Revision 1.24 1999-10-05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.23 1999/09/27 14:36:36 heikki * singletons * * Revision 1.22 1999/09/23 18:01:18 heikki -- 1.7.10.4