From 7af74fb4a98eb4baeffb6a1a648e837563fe7b4f Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 8 Nov 1996 11:15:28 +0000 Subject: [PATCH] Number of keys in chain are stored in first block and the function to retrieve this information, isc_pp_num is implemented. --- isamc/isamc-p.h | 19 +++++--- isamc/isamc.c | 60 +++++++++++++++-------- isamc/merge.c | 142 +++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 158 insertions(+), 63 deletions(-) diff --git a/isamc/isamc-p.h b/isamc/isamc-p.h index 7f4601c..0d4d021 100644 --- a/isamc/isamc-p.h +++ b/isamc/isamc-p.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: isamc-p.h,v $ - * Revision 1.3 1996-11-04 14:08:55 adam + * Revision 1.4 1996-11-08 11:15:28 adam + * Number of keys in chain are stored in first block and the function + * to retrieve this information, isc_pp_num is implemented. + * + * Revision 1.3 1996/11/04 14:08:55 adam * Optimized free block usage. * * Revision 1.2 1996/11/01 08:59:13 adam @@ -49,20 +53,21 @@ struct ISAMC_s { struct ISAMC_PP_s { char *buf; - int offset; - int size; + unsigned offset; + unsigned short size; int cat; int pos; int next; ISAMC is; void *decodeClientData; int deleteFlag; + int numKeys; }; -#define ISAMC_BLOCK_OFFSET (sizeof(int)+sizeof(int)) +#define ISAMC_BLOCK_OFFSET_N (sizeof(int)+sizeof(short)) +#define ISAMC_BLOCK_OFFSET_1 (sizeof(int)+sizeof(short)+sizeof(int)) int isc_alloc_block (ISAMC is, int cat); void isc_release_block (ISAMC is, int cat, int pos); -int isc_write_dblock (ISAMC is, int cat, int pos, char *src, - int nextpos, int offset); - +int isc_read_block (ISAMC is, int cat, int pos, char *dst); +int isc_write_block (ISAMC is, int cat, int pos, char *src); diff --git a/isamc/isamc.c b/isamc/isamc.c index 656cae0..a5e1364 100644 --- a/isamc/isamc.c +++ b/isamc/isamc.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: isamc.c,v $ - * Revision 1.5 1996-11-04 14:08:57 adam + * Revision 1.6 1996-11-08 11:15:29 adam + * Number of keys in chain are stored in first block and the function + * to retrieve this information, isc_pp_num is implemented. + * + * Revision 1.5 1996/11/04 14:08:57 adam * Optimized free block usage. * * Revision 1.4 1996/11/01 13:36:46 adam @@ -27,7 +31,6 @@ /* * TODO: * Reduction to lower categories in isc_merge - * Implementation of isc_numkeys */ #include #include @@ -47,7 +50,6 @@ ISAMC_M isc_getmethod (void) { 512, 490, 100, 20 }, { 4096, 3950, 1000, 20 }, {32768, 32000, 10000, 0 }, - { 0, 0, 0, 0 } }; ISAMC_M m = xmalloc (sizeof(*m)); m->filecat = def_cat; @@ -58,7 +60,7 @@ ISAMC_M isc_getmethod (void) m->compare_item = NULL; - m->debug = 0; + m->debug = 1; m->max_blocks_mem = 10; @@ -70,7 +72,7 @@ ISAMC isc_open (const char *name, int writeflag, ISAMC_M method) { ISAMC is; ISAMC_filecat filecat; - int i, j; + int i = 0; int max_buf_size = 0; is = xmalloc (sizeof(*is)); @@ -83,7 +85,7 @@ ISAMC isc_open (const char *name, int writeflag, ISAMC_M method) /* determine number of block categories */ if (is->method->debug) logf (LOG_LOG, "isc: bsize ifill mfill mblocks"); - for (i = 0; filecat[i].bsize; i++) + do { if (is->method->debug) logf (LOG_LOG, "isc:%6d %6d %6d %6d", @@ -91,7 +93,7 @@ ISAMC isc_open (const char *name, int writeflag, ISAMC_M method) filecat[i].mfill, filecat[i].mblocks); if (max_buf_size < filecat[i].mblocks * filecat[i].bsize) max_buf_size = filecat[i].mblocks * filecat[i].bsize; - } + } while (filecat[i++].mblocks); is->no_files = i; is->max_cat = --i; /* max_buf_size is the larget buffer to be used during merge */ @@ -105,8 +107,8 @@ ISAMC isc_open (const char *name, int writeflag, ISAMC_M method) is->files = xmalloc (sizeof(*is->files)*is->no_files); if (writeflag) { - is->merge_buf = xmalloc (max_buf_size+128); - memset (is->merge_buf, 0, max_buf_size+128); + is->merge_buf = xmalloc (max_buf_size+256); + memset (is->merge_buf, 0, max_buf_size+256); } else is->merge_buf = NULL; @@ -185,13 +187,14 @@ int isc_write_block (ISAMC is, int cat, int pos, char *src) int isc_write_dblock (ISAMC is, int cat, int pos, char *src, int nextpos, int offset) { - int xoffset = offset + 2*sizeof(int); + unsigned short size = offset + ISAMC_BLOCK_OFFSET_N; if (is->method->debug > 2) logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d", - offset, nextpos); - memcpy (src - sizeof(int)*2, &nextpos, sizeof(int)); - memcpy (src - sizeof(int), &xoffset, sizeof(int)); - return isc_write_block (is, cat, pos, src - sizeof(int)*2); + (int) size, nextpos); + src -= ISAMC_BLOCK_OFFSET_N; + memcpy (src, &nextpos, sizeof(int)); + memcpy (src + sizeof(int), &size, sizeof(size)); + return isc_write_block (is, cat, pos, src); } static int alloc_block (ISAMC is, int cat) @@ -249,7 +252,7 @@ void isc_release_block (ISAMC is, int cat, int pos) logf (LOG_LOG, "isc: release_block in cat %d: %d", cat, pos); if (is->files[cat].fc_list) { - int b, j; + int j; for (j = 0; jfiles[cat].fc_max; j++) if (!is->files[cat].fc_list[j]) { @@ -297,21 +300,37 @@ ISAMC_PP isc_pp_open (ISAMC is, ISAMC_P ipos) char *src; pp->cat = isc_type(ipos); - pp->next = isc_block(ipos); + pp->pos = isc_block(ipos); src = pp->buf = xmalloc (is->method->filecat[pp->cat].bsize); - pp->pos = 0; + pp->next = 0; pp->size = 0; pp->offset = 0; pp->is = is; pp->decodeClientData = (*is->method->code_start)(ISAMC_DECODE); pp->deleteFlag = 0; + pp->numKeys = 0; + + if (pp->pos) + { + src = pp->buf; + isc_read_block (is, pp->cat, pp->pos, src); + memcpy (&pp->next, src, sizeof(pp->next)); + src += sizeof(pp->next); + memcpy (&pp->size, src, sizeof(pp->size)); + src += sizeof(pp->size); + memcpy (&pp->numKeys, src, sizeof(pp->numKeys)); + src += sizeof(pp->numKeys); + assert (pp->next != pp->pos); + pp->offset = src - pp->buf; + assert (pp->offset == ISAMC_BLOCK_OFFSET_1); + } return pp; } /* returns non-zero if item could be read; 0 otherwise */ -int isc_read_key (ISAMC_PP pp, void *buf) +int isc_pp_read (ISAMC_PP pp, void *buf) { return isc_read_item (pp, (char **) &buf); } @@ -334,6 +353,7 @@ int isc_read_item (ISAMC_PP pp, char **dst) memcpy (&pp->size, src, sizeof(pp->size)); src += sizeof(pp->size); /* assume block is non-empty */ + assert (src - pp->buf == ISAMC_BLOCK_OFFSET_N); assert (pp->next != pp->pos); if (pp->deleteFlag) isc_release_block (is, pp->cat, pp->pos); @@ -346,8 +366,8 @@ int isc_read_item (ISAMC_PP pp, char **dst) return 1; } -int isc_numkeys (ISAMC_PP pp) +int isc_pp_num (ISAMC_PP pp) { - return 1; + return pp->numKeys; } diff --git a/isamc/merge.c b/isamc/merge.c index c2b853a..bb89fa1 100644 --- a/isamc/merge.c +++ b/isamc/merge.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: merge.c,v $ - * Revision 1.3 1996-11-04 14:08:59 adam + * Revision 1.4 1996-11-08 11:15:31 adam + * Number of keys in chain are stored in first block and the function + * to retrieve this information, isc_pp_num is implemented. + * + * Revision 1.3 1996/11/04 14:08:59 adam * Optimized free block usage. * * Revision 1.2 1996/11/01 13:36:46 adam @@ -33,12 +37,16 @@ struct isc_merge_block { }; static void flush_blocks (ISAMC is, struct isc_merge_block *mb, int ptr, - char *r_buf, int *firstpos, int cat, int last) + char *r_buf, int *firstpos, int cat, int last, + int *numkeys) { int i; for (i = 0; i mb[i].offset); - isc_write_dblock (is, cat, mb[i].block, r_buf + mb[i].offset, - mb[i+1].block, mb[i+1].offset - mb[i].offset); + ssize = mb[i+1].offset - mb[i].offset; + assert (ssize); + + if (!*firstpos) + { + *firstpos = mb[i].block; + src = r_buf + mb[i].offset - ISAMC_BLOCK_OFFSET_1; + ssize += ISAMC_BLOCK_OFFSET_1; + + memcpy (src+sizeof(int)+sizeof(ssize), numkeys, + sizeof(*numkeys)); + if (is->method->debug > 2) + logf (LOG_LOG, "isc: flush numk=%d size=%d nextpos=%d", + *numkeys, (int) ssize, mb[i+1].block); + } + else + { + src = r_buf + mb[i].offset - ISAMC_BLOCK_OFFSET_N; + ssize += ISAMC_BLOCK_OFFSET_N; + if (is->method->debug > 2) + logf (LOG_LOG, "isc: flush size=%d nextpos=%d", + (int) ssize, mb[i+1].block); + } + memcpy (src, &mb[i+1].block, sizeof(int)); + memcpy (src+sizeof(int), &ssize, sizeof(ssize)); + isc_write_block (is, cat, mb[i].block, src); } } +static int get_border (ISAMC is, struct isc_merge_block *mb, int ptr, + int cat, int firstpos) +{ + /* Border set to initial fill or block size depending on + whether we are creating a new one or updating and old one. + */ + + int fill = mb[ptr].block ? is->method->filecat[cat].bsize : + is->method->filecat[cat].ifill; + int off = (ptr||firstpos) ? ISAMC_BLOCK_OFFSET_N : ISAMC_BLOCK_OFFSET_1; + + assert (ptr < 199); + + return mb[ptr].offset + fill - off; +} + ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) { @@ -80,7 +125,8 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) char f_item[128], *f_item_ptr; int f_more; - struct isc_merge_block mb[100]; + struct isc_merge_block mb[200]; + int firstpos = 0; int cat = 0; char r_item_buf[128]; /* temporary result output */ @@ -88,9 +134,11 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) int r_offset = 0; /* current offset in r_buf */ int ptr = 0; /* pointer */ void *r_clientData; /* encode client data */ + int border; + int numKeys = 0; r_clientData = (*is->method->code_start)(ISAMC_ENCODE); - r_buf = is->merge_buf + ISAMC_BLOCK_OFFSET; + r_buf = is->merge_buf + 128; pp = isc_pp_open (is, ipos); /* read first item from file. make sure f_more indicates no boundary */ @@ -111,6 +159,7 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) mb[ptr].dirty = 0; mb[ptr].offset = 0; + border = get_border (is, mb, ptr, cat, firstpos); while (i_more || f_more) { char *r_item = r_item_buf; @@ -143,24 +192,31 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) mb[++ptr].block = pp->pos; mb[ptr].dirty = 0; mb[ptr].offset = r_offset; -#if 0 - if (r_item && cat == is->max_cat) + if (cat==is->max_cat && ptr >= is->method->max_blocks_mem) { /* We are dealing with block(s) of max size. Block(s) - will be flushed. Note: the block(s) are surely not - the last one(s). + except 1 will be flushed. */ if (is->method->debug > 2) logf (LOG_LOG, "isc: flush A %d sections", ptr); - flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat, 0); - ptr = 0; - mb[ptr].block = pp->pos; - mb[ptr].dirty = 0; - mb[ptr].offset = 0; + flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat, + 0, &numKeys); + + mb[0].block = mb[ptr-1].block; + mb[0].dirty = mb[ptr-1].dirty; + memcpy (r_buf, r_buf + mb[ptr-1].offset, + mb[ptr].offset - mb[ptr-1].offset); + mb[0].offset = 0; + + mb[1].block = mb[ptr].block; + mb[1].dirty = mb[ptr].dirty; + mb[1].offset = mb[ptr].offset - mb[ptr-1].offset; + ptr = 1; + r_offset = mb[ptr].offset; } -#endif } } + border = get_border (is, mb, ptr, cat, firstpos); } if (!f_more) cmp = -1; @@ -233,22 +289,13 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) { char *r_out_ptr = r_buf + r_offset; int new_offset; - int border; - - /* border set to initial fill or block size depending on - whether we are creating a new one or updating and old one - */ - if (mb[ptr].block) - border = mb[ptr].offset + is->method->filecat[cat].bsize - -ISAMC_BLOCK_OFFSET; - else - border = mb[ptr].offset + is->method->filecat[cat].ifill - -ISAMC_BLOCK_OFFSET; (*is->method->code_item)(ISAMC_ENCODE, r_clientData, &r_out_ptr, &r_item); new_offset = r_out_ptr - r_buf; + numKeys++; + if (border < new_offset && border >= r_offset) { if (is->method->debug > 2) @@ -266,7 +313,8 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) */ if (is->method->debug > 2) logf (LOG_LOG, "isc: flush B %d sections", ptr-1); - flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat, 0); + flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat, + 0, &numKeys); mb[0].block = mb[ptr-1].block; mb[0].dirty = mb[ptr-1].dirty; @@ -281,7 +329,8 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) new_offset - r_offset); new_offset = (new_offset - r_offset) + mb[1].offset; ptr = 1; - } + } + border = get_border (is, mb, ptr, cat, firstpos); } r_offset = new_offset; } @@ -308,7 +357,7 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) for (i = 1; i < ptr; i++) { int border = is->method->filecat[cat].ifill - - ISAMC_BLOCK_OFFSET + mb[j].offset; + ISAMC_BLOCK_OFFSET_1 + mb[j].offset; if (is->method->debug > 3) logf (LOG_LOG, "isc: remap %d border=%d", i, border); if (mb[i+1].offset > border && mb[i].offset <= border) @@ -324,12 +373,13 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) logf (LOG_LOG, "isc: remap from %d to %d sections to cat %d", ptr, j, cat); ptr = j; + border = get_border (is, mb, ptr, cat, firstpos); } } if (mb[ptr].offset < r_offset) { /* make the final boundary offset */ - mb[++ptr].dirty = 1; /* ignored by flush_blocks */ - mb[ptr].block = 0; /* ignored by flush_blocks */ + mb[++ptr].dirty = 1; + mb[ptr].block = 0; mb[ptr].offset = r_offset; } else @@ -347,14 +397,34 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) if (is->method->debug > 2) logf (LOG_LOG, "isc: flush C, %d sections", ptr); + if (firstpos) + { + /* we have to patch initial block with num keys if that + has changed */ + if (numKeys != isc_pp_num (pp)) + { + if (is->method->debug > 2) + logf (LOG_LOG, "isc: patch num keys firstpos=%d num=%d", + firstpos, numKeys); + bf_write (is->files[cat].bf, firstpos, ISAMC_BLOCK_OFFSET_N, + sizeof(numKeys), &numKeys); + } + } + else if (ptr > 0) + { /* we haven't flushed initial block yet and there surely are some + blocks to flush. Make first block dirty if numKeys differ */ + if (numKeys != isc_pp_num (pp)) + mb[0].dirty = 1; + } /* flush rest of block(s) in r_buf */ - flush_blocks (is, mb, ptr, r_buf, &firstpos, cat, 1); + flush_blocks (is, mb, ptr, r_buf, &firstpos, cat, 1, &numKeys); (*is->method->code_stop)(ISAMC_ENCODE, r_clientData); if (!firstpos) cat = 0; if (is->method->debug > 1) logf (LOG_LOG, "isc: isc_merge return %d %d", cat, firstpos); + isc_pp_close (pp); return cat + firstpos * 8; } -- 1.7.10.4