From: Heikki Levanto Date: Tue, 6 Jul 1999 09:37:04 +0000 (+0000) Subject: Working on isamh - not ready yet. X-Git-Tag: ZEBRA.1.0~95 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=5ed7bb5a4a7e46271cc161304b114eae54b53674 Working on isamh - not ready yet. --- diff --git a/include/isamh.h b/include/isamh.h index aca5503..fde8cfb 100644 --- a/include/isamh.h +++ b/include/isamh.h @@ -71,6 +71,8 @@ int isamh_block_size (ISAMH is, int type); #define isamh_type(x) ((x) & 7) #define isamh_block(x) ((x) >> 3) +void isamh_buildfirstblock(ISAMH_PP pp); +void isamh_buildlaterblock(ISAMH_PP pp); #ifdef __cplusplus } @@ -81,7 +83,10 @@ int isamh_block_size (ISAMH is, int type); /* * $Log: isamh.h,v $ - * Revision 1.1 1999-06-30 15:06:28 heikki + * Revision 1.2 1999-07-06 09:37:04 heikki + * Working on isamh - not ready yet. + * + * Revision 1.1 1999/06/30 15:06:28 heikki * copied from isamc.h, simplifying * */ diff --git a/index/kcompare.c b/index/kcompare.c index 05d17ba..8f17a7b 100644 --- a/index/kcompare.c +++ b/index/kcompare.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: kcompare.c,v $ - * Revision 1.30 1999-06-30 15:07:23 heikki + * Revision 1.31 1999-07-06 09:37:04 heikki + * Working on isamh - not ready yet. + * + * Revision 1.30 1999/06/30 15:07:23 heikki * Adding isamh stuff * * Revision 1.29 1999/06/30 09:08:23 adam @@ -352,8 +355,9 @@ ISAMH_M key_isamh_m (Res res) me->code_start = iscz1_code_start; me->code_item = iscz1_code_item; me->code_stop = iscz1_code_stop; + me->code_reset = iscz1_code_reset; - me->debug = atoi(res_get_def (res, "isamhDebug", "2")); + me->debug = atoi(res_get_def (res, "isamhDebug", "9")); return me; } diff --git a/isamc/isamh-p.h b/isamc/isamh-p.h index 3957303..6e8078b 100644 --- a/isamc/isamh-p.h +++ b/isamc/isamh-p.h @@ -50,7 +50,9 @@ typedef struct ISAMH_file_s { struct ISAMH_s { int no_files; int max_cat; - char *merge_buf; + // char *merge_buf; + char *startblock; /* start of the chain, update lastptr and numKeys here */ + char *lastblock; /* end of the chain, append here */ ISAMH_M method; ISAMH_file files; }; @@ -59,17 +61,22 @@ struct ISAMH_PP_s { char *buf; ISAMH_BLOCK_SIZE offset; ISAMH_BLOCK_SIZE size; - int cat; - int pos; - int next; + int cat; /* category of this block */ + int pos; /* block number of this block */ + int next; /* number of the next block */ ISAMH is; void *decodeClientData; int deleteFlag; int numKeys; + ISAMH_BLOCK_SIZE lastblock; /* last block in chain */ }; -#define ISAMH_BLOCK_OFFSET_N (sizeof(int)+sizeof(ISAMH_BLOCK_SIZE)) -#define ISAMH_BLOCK_OFFSET_1 (sizeof(int)+sizeof(ISAMH_BLOCK_SIZE)+sizeof(int)) +#define ISAMH_BLOCK_OFFSET_N (sizeof(int) + \ + sizeof(ISAMH_BLOCK_SIZE)) +#define ISAMH_BLOCK_OFFSET_1 (sizeof(int) + \ + sizeof(ISAMH_BLOCK_SIZE) + \ + sizeof(int) + \ + sizeof(ISAMH_BLOCK_SIZE)) int isamh_alloc_block (ISAMH is, int cat); void isamh_release_block (ISAMH is, int cat, int pos); int isamh_read_block (ISAMH is, int cat, int pos, char *dst); @@ -83,7 +90,10 @@ int isamh_write_block (ISAMH is, int cat, int pos, char *src); /* * $Log: isamh-p.h,v $ - * Revision 1.1 1999-06-30 15:05:45 heikki + * Revision 1.2 1999-07-06 09:37:05 heikki + * Working on isamh - not ready yet. + * + * Revision 1.1 1999/06/30 15:05:45 heikki * opied from isamc.p.h, starting to simplify * */ \ No newline at end of file diff --git a/isamc/isamh.c b/isamc/isamh.c index a78532f..011f784 100644 --- a/isamc/isamh.c +++ b/isamc/isamh.c @@ -34,8 +34,9 @@ ISAMH_M isamh_getmethod (void) { static struct ISAMH_filecat_s def_cat[] = { #if SMALL_TEST - { 32, 3 }, - { 64, 0 }, +/* blocksz, maxnum */ + { 32, 3 }, + { 64, 0 }, #else { 24, 10 }, { 128, 10 }, @@ -85,15 +86,18 @@ ISAMH isamh_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method) if (is->method->debug) logf (LOG_LOG, "isc:%6d %6d", filecat[i].bsize, filecat[i].mblocks); - if (max_buf_size < filecat[i].mblocks * filecat[i].bsize) - max_buf_size = filecat[i].mblocks * filecat[i].bsize; + if (max_buf_size < filecat[i].bsize) + max_buf_size = filecat[i].bsize; } while (filecat[i++].mblocks); is->no_files = i; is->max_cat = --i; +#ifdef SKIPTHIS /* max_buf_size is the larget buffer to be used during merge */ max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize; if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize) max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize; +#endif + if (is->method->debug) logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size); @@ -101,11 +105,20 @@ ISAMH isamh_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method) is->files = (ISAMH_file) xmalloc (sizeof(*is->files)*is->no_files); if (writeflag) { +#ifdef SKIPTHIS is->merge_buf = (char *) xmalloc (max_buf_size+256); memset (is->merge_buf, 0, max_buf_size+256); +#else + is->startblock = (char *) xmalloc (max_buf_size+256); + memset (is->startblock, 0, max_buf_size+256); + is->lastblock = (char *) xmalloc (max_buf_size+256); + memset (is->lastblock, 0, max_buf_size+256); + /* The spare 256 bytes should not be needed! */ +#endif } else - is->merge_buf = NULL; + is->startblock = is->lastblock = NULL; + for (i = 0; ino_files; i++) { char fname[512]; @@ -200,7 +213,8 @@ int isamh_close (ISAMH is) bf_close (is->files[i].bf); } xfree (is->files); - xfree (is->merge_buf); + xfree (is->startblock); + xfree (is->lastblock); xfree (is->method); xfree (is); return 0; @@ -453,7 +467,8 @@ ISAMH_PP isamh_pp_open (ISAMH is, ISAMH_P ipos) pp->decodeClientData = (*is->method->code_start)(ISAMH_DECODE); pp->deleteFlag = 0; pp->numKeys = 0; - + pp->lastblock=0; + if (pp->pos) { src = pp->buf; @@ -464,6 +479,8 @@ ISAMH_PP isamh_pp_open (ISAMH is, ISAMH_P ipos) src += sizeof(pp->size); memcpy (&pp->numKeys, src, sizeof(pp->numKeys)); src += sizeof(pp->numKeys); + memcpy (&pp->lastblock, src, sizeof(pp->lastblock)); + src += sizeof(pp->lastblock); assert (pp->next != pp->pos); pp->offset = src - pp->buf; assert (pp->offset == ISAMH_BLOCK_OFFSET_1); @@ -474,6 +491,40 @@ ISAMH_PP isamh_pp_open (ISAMH is, ISAMH_P ipos) return pp; } +void isamh_buildfirstblock(ISAMH_PP pp){ + char *dst=pp->buf; + assert(pp->buf); + assert(pp->next != pp->pos); + memcpy(dst, &pp->next, sizeof(pp->next) ); + dst += sizeof(pp->next); + memcpy(dst, &pp->size,sizeof(pp->size)); + dst += sizeof(pp->size); + memcpy(dst, &pp->numKeys, sizeof(pp->numKeys)); + dst += sizeof(pp->numKeys); + memcpy(dst, &pp->lastblock, sizeof(pp->lastblock)); + dst += sizeof(pp->lastblock); + assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_1); + if (pp->is->method->debug > 2) + logf (LOG_LOG, "isamh: firstblock: sz=%d c=%d p=%d>%d>%d nk=%d", + pp->size, pp->cat, pp->pos, pp->next, pp->lastblock,pp->numKeys); +} + +void isamh_buildlaterblock(ISAMH_PP pp){ + char *dst=pp->buf; + assert(pp->buf); + assert(pp->next != pp->pos); + memcpy(dst, &pp->next, sizeof(pp->next) ); + dst += sizeof(pp->next); + memcpy(dst, &pp->size,sizeof(pp->size)); + dst += sizeof(pp->size); + assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_N); + if (pp->is->method->debug > 2) + logf (LOG_LOG, "isamh: laterblock: sz=%d c=%d p=%d>%d", + pp->size, pp->cat, pp->pos, pp->next); +} + + + /* returns non-zero if item could be read; 0 otherwise */ int isamh_pp_read (ISAMH_PP pp, void *buf) { @@ -550,7 +601,10 @@ int isamh_pp_num (ISAMH_PP pp) /* * $Log: isamh.c,v $ - * Revision 1.1 1999-06-30 15:04:54 heikki + * Revision 1.2 1999-07-06 09:37:05 heikki + * Working on isamh - not ready yet. + * + * Revision 1.1 1999/06/30 15:04:54 heikki * Copied from isamc.c, slowly starting to simplify... * */ \ No newline at end of file diff --git a/isamc/merge.c b/isamc/merge.c index 39c293e..5970471 100644 --- a/isamc/merge.c +++ b/isamc/merge.c @@ -9,7 +9,6 @@ #include #include #include - #include #include "isamc-p.h" #include "isamh-p.h" @@ -463,9 +462,216 @@ ISAMC_P isc_merge (ISAMC is, ISAMC_P ipos, ISAMC_I data) return cat + firstpos * 8; } +char *hexdump(unsigned char *p, int len, char *buff) { + static char localbuff[128]; + char bytebuff[8]; + if (!buff) buff=localbuff; + *buff='\0'; + while (len--) { + sprintf(bytebuff,"%02x",*p); + p++; + strcat(buff,bytebuff); + if (len) strcat(buff,","); + } + return buff; +} + ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) { + ISAMH_PP pp; + char f_item[128]; + char *f_item_ptr=f_item; + int fmore=1; + + char i_item[128]; + char *i_item_ptr; + int i_more=1, i_mode, i; + + char *r_out_ptr; + + char codebuffer[128]; + char *codeptr; + int codelen; + + ISAMH_PP firstpp; + void *r_clientData; /* encode client data */ + int newblock; + int newcat; + int numKeys = 0; + int maxsize; + int retval; + + pp = firstpp = isamh_pp_open (is, ipos); + assert (*is->method->code_reset); + + if ( 0==ipos) + { /* new block */ + pp->cat=0; + pp->pos = isamh_alloc_block(is,pp->cat); + pp->size= pp->offset = ISAMH_BLOCK_OFFSET_1 ; + logf(LOG_LOG,"isamh_append: starting with new block"); + } + else + { /* existing block */ + if (firstpp->lastblock == firstpp->pos) + { /* only one block, we have it already */ + pp->offset=ISAMH_BLOCK_OFFSET_1; + logf(LOG_LOG,"isamh_append: starting with one block"); + } + else + { /* TODO: Read the last block (into what buffer?) */ + pp->offset=ISAMH_BLOCK_OFFSET_N; + logf(LOG_LOG,"isamh_append: starting with multiple blocks"); + } /* get last */ + /* read pointers in it to synchronize the encoder ??!! */ + codeptr=codebuffer; + //while () { + //} + } /* existing block */ + + r_clientData = (*is->method->code_start)(ISAMH_ENCODE); + + i_item_ptr = i_item; + i_more = (*data->read_item)(data->clientData,&i_item_ptr,&i_mode); + logf(LOG_LOG,"isamh_append 1: m=%d l=%d %s", + i_mode, i_item_ptr-i_item, hexdump(i_item,i_item_ptr-i_item,0)); + + maxsize = is->method->filecat[pp->cat].bsize; + + while(i_more) { + codeptr = codebuffer; + i_item_ptr=i_item; + (*is->method->code_item)(ISAMH_ENCODE, r_clientData, &codeptr, &i_item_ptr); + codelen = codeptr-codebuffer; + + assert( (codelen < 128) && (codelen>0)); + + logf(LOG_LOG,"isamh_append: coded into %d:%s", + codelen,hexdump(codebuffer,codelen,0)); + + + if ( pp->offset + codelen > maxsize ) + { + logf(LOG_LOG,"isamh_append: need new block: %d > %d ", + pp->offset + codelen, maxsize ); + newcat = pp->cat; /* TODO - grow that block some day... */ + newblock = isamh_alloc_block(is,newcat); + pp->next = newblock; + if (firstpp!=pp) + { /* not first block, write to disk already now */ + isamh_buildlaterblock(pp); + isamh_write_block(is,pp->cat,pp->pos,pp->buf); + //if (cat != newcat) + // realloc buf !!!! + } + else + { /* we had only one block, allocate a second buffer */ + pp = (ISAMH_PP) xmalloc (sizeof(*pp)); + assert(pp); + *pp = *firstpp; /* copy most fields directly over */ + pp->buf = (char *) xmalloc (is->method->filecat[newcat].bsize); + } + pp->cat = newcat; + pp->pos = newblock; + pp->size=pp->offset=ISAMH_BLOCK_OFFSET_N ; + pp->next=0; + logf(LOG_LOG,"isamh_append: got a new block %d",pp->pos); + + /* reset the encoding, and code again */ + (*is->method->code_reset)(r_clientData); + codeptr = codebuffer; + i_item_ptr=i_item; + (*is->method->code_item)(ISAMH_ENCODE, r_clientData, &codeptr, &i_item_ptr); + codelen = codeptr-codebuffer; + logf(LOG_LOG,"isamh_append: coded again %d:%s", + codelen,hexdump(codebuffer,codelen,0)); + + } /* new block needed */ + + /* ok, now we can write it */ + memcpy(&(pp->buf[pp->offset]), codebuffer, codelen); + pp->offset += codelen; + pp->size += codelen; + firstpp->numKeys++; + + /* and try to read the next element */ + i_item_ptr = i_item; + i_more = (*data->read_item)(data->clientData,&i_item_ptr,&i_mode); + logf(LOG_LOG,"isamh_append 2: m=%d l=%d %s", + i_mode, i_item_ptr-i_item, hexdump(i_item,i_item_ptr-i_item,0)); + + } + + /* Write the last (partial) block, if needed. */ + if (pp!=firstpp) + { + isamh_buildlaterblock(pp); + isamh_write_block(is,pp->cat,pp->pos,pp->buf); + } + + /* update first block and write it */ + firstpp->lastblock = pp->pos; + isamh_buildfirstblock(firstpp); + isamh_write_block(is,firstpp->cat,firstpp->pos,firstpp->buf); + + /* release the second block, if we allocated one */ + if ( firstpp != pp ) + { + xfree(pp->buf); + xfree(pp); + } + + retval = firstpp->pos*8 + firstpp->cat; + + isamh_pp_close(firstpp); + + return retval; + +} /* isamh_append */ + +ISAMC_P test_isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) +/* test routines while fighting it */ +{ + /* ipos is always ==0, in my test, as I have no earlier base to insert */ + /* into. The key extractor calls this only once for each key to be inserted */ + + + ISAMH_PP pp; + char f_item[128]; + char *f_item_ptr=f_item; + int fmore=1; + + char i_item[128]; + char *i_item_ptr; + int i_more=1, i_mode, i; + + pp = isamh_pp_open (is, ipos); + logf (LOG_LOG, "isamh_append:scannig fmore loop (ipos=%d)",ipos); + while (fmore) + { + f_item_ptr=f_item; + fmore = isamh_read_item (pp,&f_item_ptr); + logf (LOG_LOG, "isamh_append: fmore=%d len=%d", + fmore, f_item_ptr-f_item); + } /* while fmore */ + + logf (LOG_LOG, "isamh_append:scannig imore loop"); + + while(i_more) { + i_item_ptr = i_item; + i_more = (*data->read_item)(data->clientData,&i_item_ptr,&i_mode); + logf(LOG_LOG,"isamh_append: mode=%d len=%d",i_mode, i_item_ptr-i_item); + } + + isamh_pp_close(pp); +} /* foo isamh_append */ + +#ifdef SKIPOLDISAM + +ISAMC_P old_isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) +{ + char i_item[128], *i_item_ptr; int i_more, i_mode, i; @@ -499,7 +705,7 @@ ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) cat = pp->cat; if (debug > 1) - logf (LOG_LOG, "isc: isamh_append begin %d %d", cat, pp->pos); + logf (LOG_LOG, "isc: isamh_append begin %d %d %d", cat, pp->pos, ipos); /* read first item from i */ i_item_ptr = i_item; @@ -525,7 +731,12 @@ ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) /* the resulting output is of the same category as the the original */ + +#ifdef SKIPTHIS /* should not happen when just appending new records */ if (r_offset <= mb[ptr].offset +is->method->filecat[cat].mfill) +#else + if (0) +#endif { /* the resulting output block is too small/empty. Delete the original (if any) @@ -542,6 +753,7 @@ ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) } else { + /* indicate new boundary based on the original file */ mb[++ptr].block = pp->pos; mb[ptr].dirty = last_dirty; @@ -694,6 +906,9 @@ ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) } r_offset = new_offset; } +#ifdef SKIPTHIS /* categories are handled differently in isamH */ + /* to be implemented later... */ + if (cat < is->max_cat && ptr >= is->method->filecat[cat].mblocks) { /* Max number blocks in current category reached -> @@ -739,6 +954,8 @@ ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) if (debug > 3) logf (LOG_LOG, "isc: border=%d r_offset=%d", border, r_offset); } +#endif /* skipthis */ + } if (mb[ptr].offset < r_offset) { /* make the final boundary offset */ @@ -792,11 +1009,14 @@ ISAMC_P isamh_append (ISAMH is, ISAMH_P ipos, ISAMH_I data) isamh_pp_close (pp); return cat + firstpos * 8; } - +#endif /* SKIPOLDISAM */ /* * $Log: merge.c,v $ - * Revision 1.12 1999-06-30 15:03:55 heikki + * Revision 1.13 1999-07-06 09:37:05 heikki + * Working on isamh - not ready yet. + * + * Revision 1.12 1999/06/30 15:03:55 heikki * first take on isamh, the append-only isam structure * * Revision 1.11 1999/05/26 07:49:14 adam