X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamc%2Fisamd.c;h=8fb7dcaec219e5f243522bc3556fda3ee5421010;hb=e5e4f1de02396b2cfab488f8e5918853e96d6b53;hp=5134f4bc22b98c5e0167465aee19c576aa3aeb63;hpb=8e5f17591325078343ec4dff4708a12c78863f78;p=idzebra-moved-to-github.git diff --git a/isamc/isamd.c b/isamc/isamd.c index 5134f4b..8fb7dca 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,14 +1,24 @@ -/* - * Copyright (c) 1995-1998, Index Data. - * See the file LICENSE for details. - * Heikki Levanto - * - * Isamd - isam with diffs - * - * todo - * most of it, this is just a copy of isamh - * - */ +/* $Id: isamd.c,v 1.23 2002-08-02 19:26:56 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ @@ -18,48 +28,61 @@ #include #include -#include +#include +#include "../index/index.h" /* isamd uses the internal structure of it_key */ #include "isamd-p.h" -#include "../index/index.h" /* for dump */ - -static void flush_block (ISAMH is, int cat); -static void release_fc (ISAMH is, int cat); -static void init_fc (ISAMH is, int cat); +static void flush_block (ISAMD is, int cat); +static void release_fc (ISAMD is, int cat); +static void init_fc (ISAMD is, int cat); -#define ISAMH_FREELIST_CHUNK 1 +#define ISAMD_FREELIST_CHUNK 1 -#define SMALL_TEST 1 +#define SMALL_TEST 0 -ISAMH_M isamd_getmethod (void) +ISAMD_M isamd_getmethod (ISAMD_M me) { - static struct ISAMH_filecat_s def_cat[] = { + static struct ISAMD_filecat_s def_cat[] = { #if SMALL_TEST -/* blocksz, max keys before switching size */ - /* { 32, 40 }, */ - { 128, 0 }, +/* blocksz, max. Unused time being */ + { 32, 40 }, /* 24 is the smallest unreasonable size! */ + { 64, 0 }, #else - { 24, 40 }, - { 2048, 2048 }, - { 16384, 0 }, - -#endif -#ifdef OLDVALUES - { 24, 40 }, - { 128, 256 }, - { 512, 1024 }, - { 2048, 4096 }, - { 8192,16384 }, - { 32768, 0 }, + { 32, 1 }, + { 128, 1 }, + { 256, 1 }, + { 512, 1 }, + { 1024, 1 }, + { 2048, 1 }, + { 4096, 1 }, + { 8192, 0 }, #endif -/* assume about 2 bytes per pointer, when compressed. The head uses */ -/* 16 bytes, and other blocks use 8 for header info... If you want 3 */ -/* blocks of 32 bytes, say max 16+24+24 = 64 keys */ +#ifdef SKIPTHIS + + + + { 32, 1 }, + { 128, 1 }, + { 512, 1 }, + { 2048, 1 }, + { 8192, 1 }, + { 32768, 1 }, + {131072, 0 }, + + { 24, 1 }, /* Experimental sizes */ + { 32, 1 }, + { 64, 1 }, + { 128, 1 }, + { 256, 1 }, + { 512, 1 }, + { 1024, 1 }, + { 2048, 0 }, +#endif }; - ISAMH_M m = (ISAMH_M) xmalloc (sizeof(*m)); - m->filecat = def_cat; + ISAMD_M m = (ISAMD_M) xmalloc (sizeof(*m)); /* never released! */ + m->filecat = def_cat; /* ok, only alloc'd once */ m->code_start = NULL; m->code_item = NULL; @@ -68,7 +91,7 @@ ISAMH_M isamd_getmethod (void) m->compare_item = NULL; - m->debug = 1; + m->debug = 0; /* default to no debug */ m->max_blocks_mem = 10; @@ -77,60 +100,35 @@ ISAMH_M isamd_getmethod (void) -ISAMH isamd_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method) +ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method) { - ISAMH is; - ISAMH_filecat filecat; + ISAMD is; + ISAMD_filecat filecat; int i = 0; - int max_buf_size = 0; - is = (ISAMH) xmalloc (sizeof(*is)); + is = (ISAMD) xmalloc (sizeof(*is)); - is->method = (ISAMH_M) xmalloc (sizeof(*is->method)); + is->method = (ISAMD_M) xmalloc (sizeof(*is->method)); memcpy (is->method, method, sizeof(*method)); filecat = is->method->filecat; assert (filecat); /* determine number of block categories */ - if (is->method->debug) - logf (LOG_LOG, "isc: bsize maxkeys"); + if (is->method->debug>0) + logf (LOG_LOG, "isamd: bsize maxkeys"); do { - if (is->method->debug) - logf (LOG_LOG, "isc:%6d %6d", + if (is->method->debug>0) + logf (LOG_LOG, "isamd:%6d %6d", filecat[i].bsize, filecat[i].mblocks); - if (max_buf_size < filecat[i].bsize) - max_buf_size = filecat[i].bsize; } while (filecat[i++].mblocks); is->no_files = i; is->max_cat = --i; -#ifdef SKIPTHIS - /* max_buf_size is the larget buffer to be used during merge */ - max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize; - if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize) - max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize; -#endif - - if (is->method->debug) - logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size); - + assert (is->no_files > 0); - is->files = (ISAMH_file) xmalloc (sizeof(*is->files)*is->no_files); - if (writeflag) - { -#ifdef SKIPTHIS - is->merge_buf = (char *) xmalloc (max_buf_size+256); - memset (is->merge_buf, 0, max_buf_size+256); -#else - is->startblock = (char *) xmalloc (max_buf_size+256); - memset (is->startblock, 0, max_buf_size+256); - is->lastblock = (char *) xmalloc (max_buf_size+256); - memset (is->lastblock, 0, max_buf_size+256); - /* The spare 256 bytes should not be needed! */ -#endif - } - else - is->startblock = is->lastblock = NULL; + assert (is->max_cat <=8 ); /* we have only 3 bits for it */ + + is->files = (ISAMD_file) xmalloc (sizeof(*is->files)*is->no_files); for (i = 0; ino_files; i++) { @@ -140,7 +138,7 @@ ISAMH isamd_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method) is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize, writeflag); is->files[i].head_is_dirty = 0; - if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMH_head), + if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMD_head), &is->files[i].head)) { is->files[i].head.lastblock = 1; @@ -151,7 +149,7 @@ ISAMH isamd_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method) is->method->filecat[i].bsize / sizeof(int) - 1; is->files[i].alloc_buf = (char *) xmalloc (is->method->filecat[i].bsize); - is->files[i].no_writes = 0; + is->files[i].no_writes = 0; /* clear statistics */ is->files[i].no_reads = 0; is->files[i].no_skip_writes = 0; is->files[i].no_allocated = 0; @@ -163,105 +161,203 @@ ISAMH isamd_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method) is->files[i].sum_backward = 0; is->files[i].no_next = 0; is->files[i].no_prev = 0; - + is->files[i].no_op_diffonly=0; + is->files[i].no_op_main=0; init_fc (is, i); } + is->last_pos=0; + is->last_cat=0; + is->no_read=0; + is->no_read_main=0; + is->no_write=0; + is->no_op_single=0; + is->no_op_new=0; + is->no_read_keys=0; + is->no_read_eof=0; + is->no_seek_nxt=0; + is->no_seek_sam=0; + is->no_seek_fwd=0; + is->no_seek_prv=0; + is->no_seek_bak=0; + is->no_seek_cat=0; + is->no_fbuilds=0; + is->no_appds=0; + is->no_merges=0; + is->no_non=0; + is->no_singles=0; + return is; } -int isamd_block_used (ISAMH is, int type) +int isamd_block_used (ISAMD is, int type) { + if ( type==-1) /* singleton */ + return 0; if (type < 0 || type >= is->no_files) return -1; return is->files[type].head.lastblock-1; } -int isamd_block_size (ISAMH is, int type) +int isamd_block_size (ISAMD is, int type) { - ISAMH_filecat filecat = is->method->filecat; + ISAMD_filecat filecat = is->method->filecat; + if ( type==-1) /* singleton */ + return 0; /* no bytes used */ if (type < 0 || type >= is->no_files) return -1; return filecat[type].bsize; } -int isamd_close (ISAMH is) +int isamd_close (ISAMD is) { int i; + int s; - if (is->method->debug) + if (is->method->debug>0) { - logf (LOG_LOG, "isc: next forw mid-f prev backw mid-b"); + logf (LOG_LOG, "isamd statistics"); + logf (LOG_LOG, "f nxt forw mid-f prev backw mid-b"); for (i = 0; ino_files; i++) - logf (LOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f", + logf (LOG_LOG, "%d%7d%7d%7.1f%7d%7d%7.1f",i, is->files[i].no_next, is->files[i].no_forward, is->files[i].no_forward ? - (double) is->files[i].sum_forward/is->files[i].no_forward - : 0.0, + (double) is->files[i].sum_forward/is->files[i].no_forward + : 0.0, is->files[i].no_prev, is->files[i].no_backward, is->files[i].no_backward ? - (double) is->files[i].sum_backward/is->files[i].no_backward - : 0.0); + (double) is->files[i].sum_backward/is->files[i].no_backward + : 0.0); } - if (is->method->debug) - logf (LOG_LOG, "isc: writes reads skipped alloc released remap"); + if (is->method->debug>0) + logf (LOG_LOG, "f writes reads skipped alloc released "); for (i = 0; ino_files; i++) { release_fc (is, i); assert (is->files[i].bf); if (is->files[i].head_is_dirty) - bf_write (is->files[i].bf, 0, 0, sizeof(ISAMH_head), + bf_write (is->files[i].bf, 0, 0, sizeof(ISAMD_head), &is->files[i].head); - if (is->method->debug) - logf (LOG_LOG, "isc:%8d%8d%8d%8d%8d%8d", + if (is->method->debug>0) + logf (LOG_LOG, "%d%8d%8d%8d%8d%8d",i, is->files[i].no_writes, is->files[i].no_reads, is->files[i].no_skip_writes, is->files[i].no_allocated, - is->files[i].no_released, - is->files[i].no_remap); + is->files[i].no_released); xfree (is->files[i].fc_list); flush_block (is, i); bf_close (is->files[i].bf); } + + if (is->method->debug>0) + { + logf (LOG_LOG, "f opens main diffonly"); + for (i = 0; ino_files; i++) + { + logf (LOG_LOG, "%d%8d%8d%8d",i, + is->files[i].no_op_main+ + is->files[i].no_op_diffonly, + is->files[i].no_op_main, + is->files[i].no_op_diffonly); + } + logf(LOG_LOG,"open single %8d", is->no_op_single); + logf(LOG_LOG,"open new %8d", is->no_op_new); + + logf(LOG_LOG, "new build %8d", is->no_fbuilds); + logf(LOG_LOG, "append %8d", is->no_appds); + logf(LOG_LOG, " merges %8d", is->no_merges); + logf(LOG_LOG, " singles %8d", is->no_singles); + logf(LOG_LOG, " no-ops %8d", is->no_non); + + logf(LOG_LOG, "read blocks %8d", is->no_read); + logf(LOG_LOG, "read keys: %8d %8.1f k/bl", + is->no_read_keys, + 1.0*(is->no_read_keys+1)/(is->no_read+1) ); + logf(LOG_LOG, "read main-k %8d %8.1f %% of keys", + is->no_read_main, + 100.0*(is->no_read_main+1)/(is->no_read_keys+1) ); + logf(LOG_LOG, "read ends: %8d %8.1f k/e", + is->no_read_eof, + 1.0*(is->no_read_keys+1)/(is->no_read_eof+1) ); + s= is->no_seek_nxt+ is->no_seek_sam+ is->no_seek_fwd + + is->no_seek_prv+ is->no_seek_bak+ is->no_seek_cat; + if (s==0) + s++; + logf(LOG_LOG, "seek same %8d %8.1f%%", + is->no_seek_sam, 100.0*is->no_seek_sam/s ); + logf(LOG_LOG, "seek next %8d %8.1f%%", + is->no_seek_nxt, 100.0*is->no_seek_nxt/s ); + logf(LOG_LOG, "seek prev %8d %8.1f%%", + is->no_seek_prv, 100.0*is->no_seek_prv/s ); + logf(LOG_LOG, "seek forw %8d %8.1f%%", + is->no_seek_fwd, 100.0*is->no_seek_fwd/s ); + logf(LOG_LOG, "seek back %8d %8.1f%%", + is->no_seek_bak, 100.0*is->no_seek_bak/s ); + logf(LOG_LOG, "seek cat %8d %8.1f%%", + is->no_seek_cat, 100.0*is->no_seek_cat/s ); + } xfree (is->files); - xfree (is->startblock); - xfree (is->lastblock); xfree (is->method); xfree (is); return 0; } -int isamd_read_block (ISAMH is, int cat, int pos, char *dst) +static void isamd_seek_stat(ISAMD is, int cat, int pos) { + if (cat != is->last_cat) + is->no_seek_cat++; + else if ( pos == is->last_pos) + is->no_seek_sam++; + else if ( pos == is->last_pos+1) + is->no_seek_nxt++; + else if ( pos == is->last_pos-1) + is->no_seek_prv++; + else if ( pos > is->last_pos) + is->no_seek_fwd++; + else if ( pos < is->last_pos) + is->no_seek_bak++; + is->last_cat = cat; + is->last_pos = pos; +} /* seek_stat */ + +int isamd_read_block (ISAMD is, int cat, int pos, char *dst) +{ + isamd_seek_stat(is,cat,pos); ++(is->files[cat].no_reads); + ++(is->no_read); + if (is->method->debug > 6) + logf (LOG_LOG, "isamd: read_block %d:%d",cat, pos); return bf_read (is->files[cat].bf, pos, 0, 0, dst); } -int isamd_write_block (ISAMH is, int cat, int pos, char *src) +int isamd_write_block (ISAMD is, int cat, int pos, char *src) { + isamd_seek_stat(is,cat,pos); ++(is->files[cat].no_writes); - if (is->method->debug > 2) - logf (LOG_LOG, "isc: write_block %d %d", cat, pos); + ++(is->no_write); + if (is->method->debug > 6) + logf (LOG_LOG, "isamd: write_block %d:%d", cat, pos); return bf_write (is->files[cat].bf, pos, 0, 0, src); } -int isamd_write_dblock (ISAMH is, int cat, int pos, char *src, +int isamd_write_dblock (ISAMD is, int cat, int pos, char *src, int nextpos, int offset) { - ISAMH_BLOCK_SIZE size = offset + ISAMH_BLOCK_OFFSET_N; - if (is->method->debug > 2) - logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d", + ISAMD_BLOCK_SIZE size = offset + ISAMD_BLOCK_OFFSET_N; + if (is->method->debug > 4) + logf (LOG_LOG, "isamd: write_dblock. size=%d nextpos=%d", (int) size, nextpos); - src -= ISAMH_BLOCK_OFFSET_N; + src -= ISAMD_BLOCK_OFFSET_N; + assert( ISAMD_BLOCK_OFFSET_N == sizeof(int)+sizeof(int) ); memcpy (src, &nextpos, sizeof(int)); memcpy (src + sizeof(int), &size, sizeof(size)); return isamd_write_block (is, cat, pos, src); } -#if ISAMH_FREELIST_CHUNK -static void flush_block (ISAMH is, int cat) +#if ISAMD_FREELIST_CHUNK +static void flush_block (ISAMD is, int cat) { char *abuf = is->files[cat].alloc_buf; int block = is->files[cat].head.freelist; @@ -274,7 +370,7 @@ static void flush_block (ISAMH is, int cat) xfree (abuf); } -static int alloc_block (ISAMH is, int cat) +static int alloc_block (ISAMD is, int cat) { int block = is->files[cat].head.freelist; char *abuf = is->files[cat].alloc_buf; @@ -320,7 +416,7 @@ static int alloc_block (ISAMH is, int cat) return block; } -static void release_block (ISAMH is, int cat, int pos) +static void release_block (ISAMD is, int cat, int pos) { char *abuf = is->files[cat].alloc_buf; int block = is->files[cat].head.freelist; @@ -357,13 +453,13 @@ static void release_block (ISAMH is, int cat, int pos) is->files[cat].alloc_entries_num++; } #else -static void flush_block (ISAMH is, int cat) +static void flush_block (ISAMD is, int cat) { char *abuf = is->files[cat].alloc_buf; xfree (abuf); } -static int alloc_block (ISAMH is, int cat) +static int alloc_block (ISAMD is, int cat) { int block; char buf[sizeof(int)]; @@ -380,7 +476,7 @@ static int alloc_block (ISAMH is, int cat) return block; } -static void release_block (ISAMH is, int cat, int pos) +static void release_block (ISAMD is, int cat, int pos) { char buf[sizeof(int)]; @@ -392,7 +488,7 @@ static void release_block (ISAMH is, int cat, int pos) } #endif -int isamd_alloc_block (ISAMH is, int cat) +int isamd_alloc_block (ISAMD is, int cat) { int block = 0; @@ -409,15 +505,17 @@ int isamd_alloc_block (ISAMH is, int cat) } if (!block) block = alloc_block (is, cat); - if (is->method->debug > 3) - logf (LOG_LOG, "isc: alloc_block in cat %d: %d", cat, block); + if (is->method->debug > 4) + logf (LOG_LOG, "isamd: alloc_block in cat %d: %d", cat, block); return block; } -void isamd_release_block (ISAMH is, int cat, int pos) +void isamd_release_block (ISAMD is, int cat, int pos) { - if (is->method->debug > 3) - logf (LOG_LOG, "isc: release_block in cat %d: %d", cat, pos); + if (is->method->debug > 4) + logf (LOG_LOG, "isamd: release_block in cat %d: %d", cat, pos); + assert(pos!=0); + if (is->files[cat].fc_list) { int j; @@ -431,7 +529,7 @@ void isamd_release_block (ISAMH is, int cat, int pos) release_block (is, cat, pos); } -static void init_fc (ISAMH is, int cat) +static void init_fc (ISAMD is, int cat) { int j = 100; @@ -442,7 +540,7 @@ static void init_fc (ISAMH is, int cat) is->files[cat].fc_list[j] = 0; } -static void release_fc (ISAMH is, int cat) +static void release_fc (ISAMD is, int cat) { int b, j = is->files[cat].fc_max; @@ -454,34 +552,99 @@ static void release_fc (ISAMH is, int cat) } } -void isamd_pp_close (ISAMH_PP pp) +void isamd_pp_close (ISAMD_PP pp) { - ISAMH is = pp->is; - - (*is->method->code_stop)(ISAMH_DECODE, pp->decodeClientData); + ISAMD is = pp->is; + + (*is->method->code_stop)(ISAMD_DECODE, pp->decodeClientData); + isamd_free_diffs(pp); /* see merge-d.h */ + if (is->method->debug > 5) + logf (LOG_LOG, "isamd_pp_close %p %d=%d:%d sz=%d n=%d=%d:%d nk=%d", + pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, + pp->next, isamd_type(pp->next), isamd_block(pp->next), + pp->numKeys ); xfree (pp->buf); xfree (pp); } -ISAMH_PP isamd_pp_open (ISAMH is, ISAMH_P ipos) -{ - ISAMH_PP pp = (ISAMH_PP) xmalloc (sizeof(*pp)); - char *src; - - pp->cat = isamd_type(ipos); - pp->pos = isamd_block(ipos); - src = pp->buf = (char *) xmalloc (is->method->filecat[pp->cat].bsize); +ISAMD_PP isamd_pp_create (ISAMD is, int cat) +/* creates a pp_buff without data in it. pos=0, cat as given */ +{ + ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp)); + int sz = is->method->filecat[is->max_cat].bsize; + pp->numKeys = 0; + pp->buf = (char *) xmalloc (sz); + memset(pp->buf,'\0',sz); /* clear the buffer, for new blocks */ + pp->next = 0; pp->size = 0; pp->offset = 0; pp->is = is; - pp->decodeClientData = (*is->method->code_start)(ISAMH_DECODE); - pp->deleteFlag = 0; + pp->diffs=0; + pp->diffbuf=0; + pp->diffinfo=0; + pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); + pp->cat = cat; + pp->pos = 0; + is->no_op_new++; + return pp; + +} + + +ISAMD_PP isamd_pp_open (ISAMD is, const char *dictbuf, int dictlen) +{ + ISAMD_P ipos; + ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp)); + char *src; + int sz = is->method->filecat[is->max_cat].bsize; + /* always allocate for the largest blocks, saves trouble */ + struct it_key singlekey; + char *c_ptr; /* for fake encoding the singlekey */ + char *i_ptr; + int ofs; + int dictnum; + pp->numKeys = 0; - pp->lastblock=0; + src = pp->buf = (char *) xmalloc (sz); + memset(src,'\0',sz); /* clear the buffer, for new blocks */ + pp->next = 0; + pp->size = 0; + pp->offset = 0; + pp->is = is; + pp->diffs=0; + pp->diffbuf=0; + pp->diffinfo=0; + pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); + + dictnum=*dictbuf; // numkeys for internals, 0 for externals + + if (0==dictnum) + { + memcpy(&ipos, dictbuf+1, sizeof(ISAMD_P) ); + } + else /* dictionary block, fake a real one */ + { + pp->cat=0; + pp->pos=0; + if (is->method->debug > 5) + logf (LOG_LOG, "isamd_pp_open dict"); + pp->numKeys=(unsigned char) dictbuf[0]; + memcpy(pp->buf+ISAMD_BLOCK_OFFSET_1, dictbuf+1,dictlen-1); + pp->size=pp->offset=dictlen+ISAMD_BLOCK_OFFSET_1-1; + is->no_op_single++; + return pp; + } /* dict block */ + + pp->cat = isamd_type(ipos); + pp->pos = isamd_block(ipos); + + if (0==pp->pos) + is->no_op_new++; + if (pp->pos) { src = pp->buf; @@ -492,43 +655,44 @@ ISAMH_PP isamd_pp_open (ISAMH is, ISAMH_P ipos) src += sizeof(pp->size); memcpy (&pp->numKeys, src, sizeof(pp->numKeys)); src += sizeof(pp->numKeys); - memcpy (&pp->lastblock, src, sizeof(pp->lastblock)); - src += sizeof(pp->lastblock); - assert (pp->next != pp->pos); + assert (pp->next != isamd_addr(pp->pos,pp->cat)); pp->offset = src - pp->buf; - assert (pp->offset == ISAMH_BLOCK_OFFSET_1); - if (is->method->debug > 2) - logf (LOG_LOG, "isamd_pp_open sz=%d c=%d p=%d n=%d", - pp->size, pp->cat, pp->pos, isamd_block(pp->next)); + assert (pp->offset == ISAMD_BLOCK_OFFSET_1); + assert(pp->size>=ISAMD_BLOCK_OFFSET_1); /*??*/ + if (pp->next) + is->files[pp->cat].no_op_main++; + else + is->files[pp->cat].no_op_diffonly++; } + if (is->method->debug > 5) + logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d", + pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, + pp->next, isamd_type(pp->next), isamd_block(pp->next) ); + return pp; } -void isamd_buildfirstblock(ISAMH_PP pp){ +void isamd_buildfirstblock(ISAMD_PP pp){ char *dst=pp->buf; assert(pp->buf); - assert(pp->next != pp->pos); + assert(pp->next != isamd_addr(pp->pos,pp->cat)); memcpy(dst, &pp->next, sizeof(pp->next) ); dst += sizeof(pp->next); memcpy(dst, &pp->size,sizeof(pp->size)); dst += sizeof(pp->size); memcpy(dst, &pp->numKeys, sizeof(pp->numKeys)); dst += sizeof(pp->numKeys); - memcpy(dst, &pp->lastblock, sizeof(pp->lastblock)); - dst += sizeof(pp->lastblock); - assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_1); - if (pp->is->method->debug > 2) - logf (LOG_LOG, "isamd: first: sz=%d p=%d/%d>%d/%d>%d/%d nk=%d", - pp->size, - pp->pos, pp->cat, - isamd_block(pp->next), isamd_type(pp->next), - isamd_block(pp->lastblock), isamd_type(pp->lastblock), - pp->numKeys); + assert (dst - pp->buf == ISAMD_BLOCK_OFFSET_1); + if (pp->is->method->debug > 5) + logf (LOG_LOG, "isamd: bldfirst: p=%d=%d:%d n=%d:%d:%d sz=%d nk=%d ", + isamd_addr(pp->pos,pp->cat),pp->cat, pp->pos, + pp->next, isamd_type(pp->next), isamd_block(pp->next), + pp->size, pp->numKeys); } -void isamd_buildlaterblock(ISAMH_PP pp){ +void isamd_buildlaterblock(ISAMD_PP pp){ char *dst=pp->buf; assert(pp->buf); assert(pp->next != isamd_addr(pp->pos,pp->cat)); @@ -536,8 +700,8 @@ void isamd_buildlaterblock(ISAMH_PP pp){ dst += sizeof(pp->next); memcpy(dst, &pp->size,sizeof(pp->size)); dst += sizeof(pp->size); - assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_N); - if (pp->is->method->debug > 2) + assert (dst - pp->buf == ISAMD_BLOCK_OFFSET_N); + if (pp->is->method->debug > 5) logf (LOG_LOG, "isamd: l8r: sz=%d p=%d/%d>%d/%d", pp->size, pp->pos, pp->cat, @@ -547,21 +711,26 @@ void isamd_buildlaterblock(ISAMH_PP pp){ /* returns non-zero if item could be read; 0 otherwise */ -int isamd_pp_read (ISAMH_PP pp, void *buf) +int isamd_pp_read (ISAMD_PP pp, void *buf) { + return isamd_read_item (pp, (char **) &buf); + /* note: isamd_read_item is in merge-d.c, because it is so */ + /* convoluted with the merge process */ } -/* read one item from file - decode and store it in *dst. +/* read one main item from file - decode and store it in *dst. + Does not worry about diffs Returns 0 if end-of-file - 1 if item could be read ok and NO boundary - 2 if item could be read ok and boundary */ -int isamd_read_item (ISAMH_PP pp, char **dst) + 1 if item could be read ok +*/ +int isamd_read_main_item (ISAMD_PP pp, char **dst) { - ISAMH is = pp->is; + ISAMD is = pp->is; char *src = pp->buf + pp->offset; int newcat; + int oldoffs; if (pp->offset >= pp->size) { @@ -592,12 +761,9 @@ int isamd_read_item (ISAMH_PP pp, char **dst) } /* out new block position */ newcat = isamd_type(pp->next); - if (pp->cat != newcat ) { - pp->buf = xrealloc(pp->buf, is->method->filecat[newcat].bsize); - } pp->pos = isamd_block(pp->next); pp->cat = isamd_type(pp->next); - + pp->is->no_read_main++; src = pp->buf; /* read block and save 'next' and 'size' entry */ isamd_read_block (is, pp->cat, pp->pos, src); @@ -606,24 +772,28 @@ int isamd_read_item (ISAMH_PP pp, char **dst) memcpy (&pp->size, src, sizeof(pp->size)); src += sizeof(pp->size); /* assume block is non-empty */ - assert (src - pp->buf == ISAMH_BLOCK_OFFSET_N); + pp->offset = oldoffs = src - pp->buf; + assert (pp->offset == ISAMD_BLOCK_OFFSET_N); assert (pp->next != isamd_addr(pp->pos,pp->cat)); - if (pp->deleteFlag) - isamd_release_block (is, pp->cat, pp->pos); (*is->method->code_reset)(pp->decodeClientData); - (*is->method->code_item)(ISAMH_DECODE, pp->decodeClientData, dst, &src); + /* finally, read the item */ + (*is->method->code_item)(ISAMD_DECODE, pp->decodeClientData, dst, &src); pp->offset = src - pp->buf; - if (is->method->debug > 2) - logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d", - pp->size, pp->cat, pp->pos, pp->next); + if (is->method->debug > 8) + logf (LOG_LOG, "isamd: read_m: block %d:%d sz=%d ofs=%d-%d next=%d", + pp->cat, pp->pos, pp->size, oldoffs, pp->offset, pp->next); return 2; } - (*is->method->code_item)(ISAMH_DECODE, pp->decodeClientData, dst, &src); + oldoffs=pp->offset; + (*is->method->code_item)(ISAMD_DECODE, pp->decodeClientData, dst, &src); pp->offset = src - pp->buf; + if (is->method->debug > 8) + logf (LOG_LOG, "isamd: read_m: got %d:%d sz=%d ofs=%d-%d next=%d", + pp->cat, pp->pos, pp->size, oldoffs, pp->offset, pp->next); return 1; } -int isamd_pp_num (ISAMH_PP pp) +int isamd_pp_num (ISAMD_PP pp) { return pp->numKeys; } @@ -643,33 +813,45 @@ static char *hexdump(unsigned char *p, int len, char *buff) { } -void isamd_pp_dump (ISAMH is, ISAMH_P ipos) +#ifdef SKIPTHIS + /* needs different arguments, or something */ +void isamd_pp_dump (ISAMD is, ISAMD_P ipos) { - ISAMH_PP pp; - ISAMH_P oldaddr=0; + ISAMD_PP pp; + ISAMD_P oldaddr=0; struct it_key key; int i,n; int occur =0; int oldoffs; + int diffmax=1; + int diffidx; char hexbuff[64]; + int olddebug= is->method->debug; + is->method->debug=0; /* no debug logs while reading for dump */ logf(LOG_LOG,"dumping isamd block %d (%d:%d)", (int)ipos, isamd_type(ipos), isamd_block(ipos) ); pp=isamd_pp_open(is,ipos); - logf(LOG_LOG,"numKeys=%d, last=%d (%d:%d) ofs=%d ", - pp->numKeys, - pp->lastblock, - isamd_type(pp->lastblock), isamd_block(pp->lastblock), - pp->offset); - oldoffs= pp->offset; + logf(LOG_LOG,"numKeys=%d, ofs=%d sz=%d", + pp->numKeys, pp->offset, pp->size ); + diffidx=oldoffs= pp->offset; + while ((diffidx < is->method->filecat[pp->cat].bsize) && (diffmax>0)) + { + memcpy(&diffmax,&(pp->buf[diffidx]),sizeof(int)); + logf (LOG_LOG,"diff set at %d-%d: %s", diffidx, diffmax, + hexdump(pp->buf+diffidx,8,0)); + /*! todo: dump the actual diffs as well !!! */ + diffidx=diffmax; + + } /* dump diffs */ while(isamd_pp_read(pp, &key)) { if (oldaddr != isamd_addr(pp->pos,pp->cat) ) { oldaddr = isamd_addr(pp->pos,pp->cat); - logf(LOG_LOG,"block %d (%d:%d) sz=%d nx=%d (%d:%d) ofs=%d", - isamd_addr(pp->pos,pp->cat), - pp->cat, pp->pos, pp->size, + logf(LOG_LOG,"block %d=%d:%d sz=%d nx=%d=%d:%d ofs=%d", + isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, + pp->size, pp->next, isamd_type(pp->next), isamd_block(pp->next), pp->offset); i=0; @@ -679,8 +861,8 @@ void isamd_pp_dump (ISAMH is, ISAMH_P ipos) logf(LOG_LOG," %05x: %s",i,hexdump(pp->buf+i,n,hexbuff)); i+=n; } - if (oldoffs > ISAMH_BLOCK_OFFSET_N) - oldoffs=ISAMH_BLOCK_OFFSET_N; + if (oldoffs > ISAMD_BLOCK_OFFSET_N) + oldoffs=ISAMD_BLOCK_OFFSET_N; } /* new block */ occur++; logf (LOG_LOG," got %d:%d=%x:%x from %s at %d=%x", @@ -690,13 +872,78 @@ void isamd_pp_dump (ISAMH is, ISAMH_P ipos) oldoffs, oldoffs); oldoffs = pp->offset; } + /*!*/ /*TODO: dump diffs too!!! */ isamd_pp_close(pp); + is->method->debug=olddebug; } /* dump */ +#endif + /* * $Log: isamd.c,v $ - * Revision 1.1 1999-07-14 12:34:43 heikki + * Revision 1.23 2002-08-02 19:26:56 adam + * Towards GPL + * + * Revision 1.22 2002/07/12 18:12:21 heikki + * Isam-D now stores small entries directly in the dictionary. + * Needs more tuning and cleaning... + * + * Revision 1.21 2002/07/11 16:16:00 heikki + * Fixed a bug in isamd, failed to store a single key when its bits + * did not fit into a singleton. + * + * Revision 1.20 2002/06/19 10:29:18 adam + * align block sizes for isam sys. Better plot for test + * + * Revision 1.19 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.18 1999/10/06 15:18:13 heikki + * + * Improving block sizes again + * + * Revision 1.17 1999/10/06 11:46:36 heikki + * mproved statistics on isam-d + * + * Revision 1.16 1999/10/05 09:57:40 heikki + * Tuning the isam-d (and fixed a small "detail") + * + * Revision 1.15 1999/09/27 14:36:36 heikki + * singletons + * + * Revision 1.14 1999/09/23 18:01:18 heikki + * singleton optimising + * + * Revision 1.13 1999/09/20 15:48:06 heikki + * Small changes + * + * Revision 1.12 1999/09/13 13:28:28 heikki + * isam-d optimizing: merging input data in the same go + * + * Revision 1.11 1999/08/25 18:09:24 heikki + * Starting to optimize + * + * Revision 1.10 1999/08/24 13:17:42 heikki + * Block sizes, comments + * + * Revision 1.9 1999/08/20 12:25:58 heikki + * Statistics in isamd + * + * Revision 1.8 1999/08/18 13:28:16 heikki + * Set log levels to decent values + * + * Revision 1.6 1999/08/17 19:44:25 heikki + * Fixed memory leaks + * + * Revision 1.4 1999/08/04 14:21:18 heikki + * isam-d seems to be working. + * + * Revision 1.3 1999/07/21 14:24:50 heikki + * isamd write and read functions ok, except when diff block full. + * (merge not yet done) + * + * Revision 1.1 1999/07/14 12:34:43 heikki * Copied from isamh, starting to change things... * * - */ \ No newline at end of file + */