X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=isamc%2Fisamd.c;h=37d40a37fc9b5a36fbc812f35a004adf79af2099;hp=c15d7c7f851bb4773e6d8eb72f16df28a5a7680c;hb=ae2d0647bd3a555df77397b6c2f9c129fda0733a;hpb=f6fb80f1dfd9b1a486595496a0f43aaeb16f7b40 diff --git a/isamc/isamd.c b/isamc/isamd.c index c15d7c7..37d40a3 100644 --- a/isamc/isamd.c +++ b/isamc/isamd.c @@ -1,23 +1,31 @@ -/* - * Copyright (c) 1995-1998, Index Data. - * See the file LICENSE for details. - * $Id: isamd.c,v 1.13 1999-09-20 15:48:06 heikki Exp $ - * - * Isamd - isam with diffs - * Programmed by: Heikki Levanto - * - * Todo - * - Statistics are missing and/or completely wrong - * - Lots of code stolen from isamc, not all needed any more - */ - +/* $Id: isamd.c,v 1.26 2003-06-23 15:36:11 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ #include #include #include #include -#include +#include #include "../index/index.h" /* isamd uses the internal structure of it_key */ #include "isamd-p.h" @@ -29,7 +37,7 @@ static void init_fc (ISAMD is, int cat); #define SMALL_TEST 0 -ISAMD_M isamd_getmethod (ISAMD_M me) +ISAMD_M *isamd_getmethod (ISAMD_M *me) { static struct ISAMD_filecat_s def_cat[] = { #if SMALL_TEST @@ -38,25 +46,39 @@ ISAMD_M isamd_getmethod (ISAMD_M me) { 64, 0 }, #else { 32, 1 }, + { 128, 1 }, + { 256, 1 }, + { 512, 1 }, + { 1024, 1 }, + { 2048, 1 }, + { 4096, 1 }, + { 8192, 0 }, + +#endif +#ifdef SKIPTHIS + + + + { 32, 1 }, { 128, 1 }, { 512, 1 }, { 2048, 1 }, { 8192, 1 }, { 32768, 1 }, {131072, 0 }, -#endif -/* old values from isamc, long time ago... - { 24, 40 }, - { 128, 256 }, - { 512, 1024 }, - { 2048, 4096 }, - { 8192,16384 }, - { 32768, 0 }, -*/ + { 24, 1 }, /* Experimental sizes */ + { 32, 1 }, + { 64, 1 }, + { 128, 1 }, + { 256, 1 }, + { 512, 1 }, + { 1024, 1 }, + { 2048, 0 }, +#endif }; - ISAMD_M m = (ISAMD_M) xmalloc (sizeof(*m)); /* never released! */ + ISAMD_M *m = (ISAMD_M *) xmalloc (sizeof(*m)); /* never released! */ m->filecat = def_cat; /* ok, only alloc'd once */ m->code_start = NULL; @@ -75,7 +97,7 @@ ISAMD_M isamd_getmethod (ISAMD_M me) -ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method) +ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M *method) { ISAMD is; ISAMD_filecat filecat; @@ -83,7 +105,7 @@ ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method) is = (ISAMD) xmalloc (sizeof(*is)); - is->method = (ISAMD_M) xmalloc (sizeof(*is->method)); + is->method = (ISAMD_M *) xmalloc (sizeof(*is->method)); memcpy (is->method, method, sizeof(*method)); filecat = is->method->filecat; assert (filecat); @@ -136,21 +158,38 @@ ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method) is->files[i].sum_backward = 0; is->files[i].no_next = 0; is->files[i].no_prev = 0; - is->files[i].no_op_nodiff=0; - is->files[i].no_op_intdiff=0; - is->files[i].no_op_extdiff=0; - is->files[i].no_fbuilds=0; - is->files[i].no_appds=0; - is->files[i].no_merges=0; - is->files[i].no_remerges=0; - + is->files[i].no_op_diffonly=0; + is->files[i].no_op_main=0; init_fc (is, i); } + is->last_pos=0; + is->last_cat=0; + is->no_read=0; + is->no_read_main=0; + is->no_write=0; + is->no_op_single=0; + is->no_op_new=0; + is->no_read_keys=0; + is->no_read_eof=0; + is->no_seek_nxt=0; + is->no_seek_sam=0; + is->no_seek_fwd=0; + is->no_seek_prv=0; + is->no_seek_bak=0; + is->no_seek_cat=0; + is->no_fbuilds=0; + is->no_appds=0; + is->no_merges=0; + is->no_non=0; + is->no_singles=0; + return is; } int isamd_block_used (ISAMD is, int type) { + if ( type==-1) /* singleton */ + return 0; if (type < 0 || type >= is->no_files) return -1; return is->files[type].head.lastblock-1; @@ -159,6 +198,8 @@ int isamd_block_used (ISAMD is, int type) int isamd_block_size (ISAMD is, int type) { ISAMD_filecat filecat = is->method->filecat; + if ( type==-1) /* singleton */ + return 0; /* no bytes used */ if (type < 0 || type >= is->no_files) return -1; return filecat[type].bsize; @@ -167,23 +208,24 @@ int isamd_block_size (ISAMD is, int type) int isamd_close (ISAMD is) { int i; + int s; if (is->method->debug>0) { logf (LOG_LOG, "isamd statistics"); - logf (LOG_LOG, "f nxt forw mid-f prev backw mid-b"); + logf (LOG_LOG, "f nxt forw mid-f prev backw mid-b"); for (i = 0; ino_files; i++) - logf (LOG_LOG, "%d%8d%8d%8.1f%8d%8d%8.1f",i, + logf (LOG_LOG, "%d%7d%7d%7.1f%7d%7d%7.1f",i, is->files[i].no_next, is->files[i].no_forward, is->files[i].no_forward ? - (double) is->files[i].sum_forward/is->files[i].no_forward - : 0.0, + (double) is->files[i].sum_forward/is->files[i].no_forward + : 0.0, is->files[i].no_prev, is->files[i].no_backward, is->files[i].no_backward ? - (double) is->files[i].sum_backward/is->files[i].no_backward - : 0.0); + (double) is->files[i].sum_backward/is->files[i].no_backward + : 0.0); } if (is->method->debug>0) logf (LOG_LOG, "f writes reads skipped alloc released "); @@ -208,23 +250,50 @@ int isamd_close (ISAMD is) if (is->method->debug>0) { - logf (LOG_LOG, "f opens simple int ext"); + logf (LOG_LOG, "f opens main diffonly"); for (i = 0; ino_files; i++) { - logf (LOG_LOG, "%d%8d%8d%8d%8d",i, - is->files[i].no_op_nodiff+ - is->files[i].no_op_intdiff+ - is->files[i].no_op_extdiff, - is->files[i].no_op_nodiff, - is->files[i].no_op_intdiff, - is->files[i].no_op_extdiff); + logf (LOG_LOG, "%d%8d%8d%8d",i, + is->files[i].no_op_main+ + is->files[i].no_op_diffonly, + is->files[i].no_op_main, + is->files[i].no_op_diffonly); } - logf (LOG_LOG, " build append merge remrg"); - logf (LOG_LOG, "=%8d%8d%8d%8d", - is->files[0].no_fbuilds, - is->files[0].no_appds, - is->files[0].no_merges, - is->files[0].no_remerges); + logf(LOG_LOG,"open single %8d", is->no_op_single); + logf(LOG_LOG,"open new %8d", is->no_op_new); + + logf(LOG_LOG, "new build %8d", is->no_fbuilds); + logf(LOG_LOG, "append %8d", is->no_appds); + logf(LOG_LOG, " merges %8d", is->no_merges); + logf(LOG_LOG, " singles %8d", is->no_singles); + logf(LOG_LOG, " no-ops %8d", is->no_non); + + logf(LOG_LOG, "read blocks %8d", is->no_read); + logf(LOG_LOG, "read keys: %8d %8.1f k/bl", + is->no_read_keys, + 1.0*(is->no_read_keys+1)/(is->no_read+1) ); + logf(LOG_LOG, "read main-k %8d %8.1f %% of keys", + is->no_read_main, + 100.0*(is->no_read_main+1)/(is->no_read_keys+1) ); + logf(LOG_LOG, "read ends: %8d %8.1f k/e", + is->no_read_eof, + 1.0*(is->no_read_keys+1)/(is->no_read_eof+1) ); + s= is->no_seek_nxt+ is->no_seek_sam+ is->no_seek_fwd + + is->no_seek_prv+ is->no_seek_bak+ is->no_seek_cat; + if (s==0) + s++; + logf(LOG_LOG, "seek same %8d %8.1f%%", + is->no_seek_sam, 100.0*is->no_seek_sam/s ); + logf(LOG_LOG, "seek next %8d %8.1f%%", + is->no_seek_nxt, 100.0*is->no_seek_nxt/s ); + logf(LOG_LOG, "seek prev %8d %8.1f%%", + is->no_seek_prv, 100.0*is->no_seek_prv/s ); + logf(LOG_LOG, "seek forw %8d %8.1f%%", + is->no_seek_fwd, 100.0*is->no_seek_fwd/s ); + logf(LOG_LOG, "seek back %8d %8.1f%%", + is->no_seek_bak, 100.0*is->no_seek_bak/s ); + logf(LOG_LOG, "seek cat %8d %8.1f%%", + is->no_seek_cat, 100.0*is->no_seek_cat/s ); } xfree (is->files); xfree (is->method); @@ -232,9 +301,29 @@ int isamd_close (ISAMD is) return 0; } +static void isamd_seek_stat(ISAMD is, int cat, int pos) +{ + if (cat != is->last_cat) + is->no_seek_cat++; + else if ( pos == is->last_pos) + is->no_seek_sam++; + else if ( pos == is->last_pos+1) + is->no_seek_nxt++; + else if ( pos == is->last_pos-1) + is->no_seek_prv++; + else if ( pos > is->last_pos) + is->no_seek_fwd++; + else if ( pos < is->last_pos) + is->no_seek_bak++; + is->last_cat = cat; + is->last_pos = pos; +} /* seek_stat */ + int isamd_read_block (ISAMD is, int cat, int pos, char *dst) { + isamd_seek_stat(is,cat,pos); ++(is->files[cat].no_reads); + ++(is->no_read); if (is->method->debug > 6) logf (LOG_LOG, "isamd: read_block %d:%d",cat, pos); return bf_read (is->files[cat].bf, pos, 0, 0, dst); @@ -242,7 +331,9 @@ int isamd_read_block (ISAMD is, int cat, int pos, char *dst) int isamd_write_block (ISAMD is, int cat, int pos, char *src) { + isamd_seek_stat(is,cat,pos); ++(is->files[cat].no_writes); + ++(is->no_write); if (is->method->debug > 6) logf (LOG_LOG, "isamd: write_block %d:%d", cat, pos); return bf_write (is->files[cat].bf, pos, 0, 0, src); @@ -464,26 +555,52 @@ void isamd_pp_close (ISAMD_PP pp) (*is->method->code_stop)(ISAMD_DECODE, pp->decodeClientData); isamd_free_diffs(pp); /* see merge-d.h */ - xfree (pp->buf); - xfree (pp); if (is->method->debug > 5) - logf (LOG_LOG, "isamd_pp_close %p %d=%d:%d sz=%d n=%d=%d:%d", + logf (LOG_LOG, "isamd_pp_close %p %d=%d:%d sz=%d n=%d=%d:%d nk=%d", pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, - pp->next, isamd_type(pp->next), isamd_block(pp->next) ); + pp->next, isamd_type(pp->next), isamd_block(pp->next), + pp->numKeys ); + xfree (pp->buf); + xfree (pp); } +ISAMD_PP isamd_pp_create (ISAMD is, int cat) +/* creates a pp_buff without data in it. pos=0, cat as given */ +{ + ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp)); + int sz = is->method->filecat[is->max_cat].bsize; + + pp->numKeys = 0; + pp->buf = (char *) xmalloc (sz); + memset(pp->buf,'\0',sz); /* clear the buffer, for new blocks */ + + pp->next = 0; + pp->size = 0; + pp->offset = 0; + pp->is = is; + pp->diffs=0; + pp->diffbuf=0; + pp->diffinfo=0; + pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); + pp->cat = cat; + pp->pos = 0; + is->no_op_new++; + return pp; + +} + -ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) +ISAMD_PP isamd_pp_open (ISAMD is, const char *dictbuf, int dictlen) { + ISAMD_P ipos; ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp)); char *src; int sz = is->method->filecat[is->max_cat].bsize; /* always allocate for the largest blocks, saves trouble */ - - pp->cat = isamd_type(ipos); - pp->pos = isamd_block(ipos); - + int dictnum; + + pp->numKeys = 0; src = pp->buf = (char *) xmalloc (sz); memset(src,'\0',sz); /* clear the buffer, for new blocks */ @@ -491,12 +608,36 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) pp->size = 0; pp->offset = 0; pp->is = is; - pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); - pp->numKeys = 0; -// pp->diffs=0; + pp->diffs=0; pp->diffbuf=0; pp->diffinfo=0; + pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE); + dictnum=*dictbuf; /* numkeys for internals, 0 for externals */ + + if (0==dictnum) + { + memcpy(&ipos, dictbuf+1, sizeof(ISAMD_P) ); + } + else /* dictionary block, fake a real one */ + { + pp->cat=0; + pp->pos=0; + if (is->method->debug > 5) + logf (LOG_LOG, "isamd_pp_open dict"); + pp->numKeys=(unsigned char) dictbuf[0]; + memcpy(pp->buf+ISAMD_BLOCK_OFFSET_1, dictbuf+1,dictlen-1); + pp->size=pp->offset=dictlen+ISAMD_BLOCK_OFFSET_1-1; + is->no_op_single++; + return pp; + } /* dict block */ + + pp->cat = isamd_type(ipos); + pp->pos = isamd_block(ipos); + + if (0==pp->pos) + is->no_op_new++; + if (pp->pos) { src = pp->buf; @@ -507,27 +648,20 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) src += sizeof(pp->size); memcpy (&pp->numKeys, src, sizeof(pp->numKeys)); src += sizeof(pp->numKeys); -// memcpy (&pp->diffs, src, sizeof(pp->diffs)); -// src += sizeof(pp->diffs); - assert (pp->next != pp->pos); + assert (pp->next != isamd_addr(pp->pos,pp->cat)); pp->offset = src - pp->buf; assert (pp->offset == ISAMD_BLOCK_OFFSET_1); -// if (0==pp->diffs) -// ++(is->files[pp->cat].no_op_nodiff); -// else -// if(pp->diffs&1) -// ++(is->files[pp->cat].no_op_extdiff); -// else -// ++(is->files[pp->cat].no_op_intdiff); - // if (!pp->diffbuf) - // pp->diffbuf=pp->buf; + assert(pp->size>=ISAMD_BLOCK_OFFSET_1); /*??*/ + if (pp->next) + is->files[pp->cat].no_op_main++; + else + is->files[pp->cat].no_op_diffonly++; } if (is->method->debug > 5) logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d", pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size, pp->next, isamd_type(pp->next), isamd_block(pp->next) ); - - + return pp; } @@ -536,15 +670,13 @@ ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos) void isamd_buildfirstblock(ISAMD_PP pp){ char *dst=pp->buf; assert(pp->buf); - assert(pp->next != pp->pos); + assert(pp->next != isamd_addr(pp->pos,pp->cat)); memcpy(dst, &pp->next, sizeof(pp->next) ); dst += sizeof(pp->next); memcpy(dst, &pp->size,sizeof(pp->size)); dst += sizeof(pp->size); memcpy(dst, &pp->numKeys, sizeof(pp->numKeys)); dst += sizeof(pp->numKeys); -// memcpy(dst, &pp->diffs, sizeof(pp->diffs)); -// dst += sizeof(pp->diffs); assert (dst - pp->buf == ISAMD_BLOCK_OFFSET_1); if (pp->is->method->debug > 5) logf (LOG_LOG, "isamd: bldfirst: p=%d=%d:%d n=%d:%d:%d sz=%d nk=%d ", @@ -574,9 +706,10 @@ void isamd_buildlaterblock(ISAMD_PP pp){ /* returns non-zero if item could be read; 0 otherwise */ int isamd_pp_read (ISAMD_PP pp, void *buf) { + return isamd_read_item (pp, (char **) &buf); - /* note: isamd_read_item is in merge-d.c, because it is so */ - /* convoluted with the merge process */ + /* note: isamd_read_item is in merge-d.c, because it is so */ + /* convoluted with the merge process */ } /* read one main item from file - decode and store it in *dst. @@ -623,7 +756,7 @@ int isamd_read_main_item (ISAMD_PP pp, char **dst) newcat = isamd_type(pp->next); pp->pos = isamd_block(pp->next); pp->cat = isamd_type(pp->next); - + pp->is->no_read_main++; src = pp->buf; /* read block and save 'next' and 'size' entry */ isamd_read_block (is, pp->cat, pp->pos, src); @@ -658,6 +791,8 @@ int isamd_pp_num (ISAMD_PP pp) return pp->numKeys; } +#if 0 +/* for testing .. */ static char *hexdump(unsigned char *p, int len, char *buff) { static char localbuff[128]; char bytebuff[8]; @@ -671,8 +806,10 @@ static char *hexdump(unsigned char *p, int len, char *buff) { } return buff; } +#endif - +#ifdef SKIPTHIS + /* needs different arguments, or something */ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) { ISAMD_PP pp; @@ -735,38 +872,5 @@ void isamd_pp_dump (ISAMD is, ISAMD_P ipos) is->method->debug=olddebug; } /* dump */ -/* - * $Log: isamd.c,v $ - * Revision 1.13 1999-09-20 15:48:06 heikki - * Small changes - * - * Revision 1.12 1999/09/13 13:28:28 heikki - * isam-d optimizing: merging input data in the same go - * - * Revision 1.11 1999/08/25 18:09:24 heikki - * Starting to optimize - * - * Revision 1.10 1999/08/24 13:17:42 heikki - * Block sizes, comments - * - * Revision 1.9 1999/08/20 12:25:58 heikki - * Statistics in isamd - * - * Revision 1.8 1999/08/18 13:28:16 heikki - * Set log levels to decent values - * - * Revision 1.6 1999/08/17 19:44:25 heikki - * Fixed memory leaks - * - * Revision 1.4 1999/08/04 14:21:18 heikki - * isam-d seems to be working. - * - * Revision 1.3 1999/07/21 14:24:50 heikki - * isamd write and read functions ok, except when diff block full. - * (merge not yet done) - * - * Revision 1.1 1999/07/14 12:34:43 heikki - * Copied from isamh, starting to change things... - * - * - */ \ No newline at end of file +#endif +