X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=ab9a72afcdcfb4997188ba0cec255179eb69acb9;hb=21b018670707eb7a06f0d722c8a67b66d2778690;hp=ebf0bd23f5319f6786bf2d09b51ebf6d2ae3439a;hpb=2e4e9c6def27f1e1463dcb6f205fab6a98054f38;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index ebf0bd2..ab9a72a 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,4 +1,4 @@ -/* $Id: isamb.c,v 1.48 2004-08-04 08:35:24 adam Exp $ +/* $Id: isamb.c,v 1.55 2004-08-18 20:00:35 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -30,6 +30,9 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define ISAMB_DEBUG 0 #endif +#define ISAMB_MAJOR_VERSION 1 +#define ISAMB_MINOR_VERSION 0 + struct ISAMB_head { zint first_block; zint last_block; @@ -56,7 +59,7 @@ struct ISAMB_head { #define CAT_NO 4 /* ISAMB_PTR_CODEC=1 var, =0 fixed */ -#define ISAMB_PTR_CODEC 0 +#define ISAMB_PTR_CODEC 1 struct ISAMB_cache_entry { ISAMB_P pos; @@ -82,10 +85,10 @@ struct ISAMB_s { int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */ int log_io; /* log level for bf_read/bf_write calls */ int log_freelist; /* log level for freelist handling */ - int skipped_numbers; /* on a leaf node */ - int returned_numbers; - int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ - int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint skipped_numbers; /* on a leaf node */ + zint returned_numbers; + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ }; struct ISAMB_block { @@ -108,12 +111,12 @@ struct ISAMB_PP_s { ISAMB_P pos; int level; int maxlevel; /* total depth */ - int total_size; - int no_blocks; - int skipped_numbers; /* on a leaf node */ - int returned_numbers; - int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ - int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint total_size; + zint no_blocks; + zint skipped_numbers; /* on a leaf node */ + zint returned_numbers; + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ struct ISAMB_block **block; }; @@ -186,6 +189,7 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, for (i = 0; ino_cat; i++) { char fname[DST_BUF_SIZE]; + char hbuf[DST_BUF_SIZE]; isamb->file[i].cache_entries = 0; isamb->file[i].head_dirty = 0; sprintf (fname, "%s%c", name, i+'A'); @@ -195,15 +199,54 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, else isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag); - - if (!bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), - &isamb->file[i].head)) + /* fill-in default values (for empty isamb) */ + isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; + isamb->file[i].head.last_block = isamb->file[i].head.first_block; + isamb->file[i].head.block_size = b_size; + isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; + isamb->file[i].head.free_list = 0; + if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf)) { - isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; - isamb->file[i].head.last_block = isamb->file[i].head.first_block; - isamb->file[i].head.block_size = b_size; - isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; - isamb->file[i].head.free_list = 0; + /* got header assume "isamb"major minor len can fit in 16 bytes */ + zint zint_tmp; + int major, minor, len, pos = 0; + int left; + const char *src = 0; + if (memcmp(hbuf, "isamb", 5)) + { + logf(LOG_WARN, "bad isamb header for file %s", fname); + return 0; + } + if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3) + { + logf(LOG_WARN, "bad isamb header for file %s", fname); + return 0; + } + if (major != ISAMB_MAJOR_VERSION) + { + logf(LOG_WARN, "bad major version for file %s %d, must be %d", + fname, major, ISAMB_MAJOR_VERSION); + return 0; + } + for (left = len - b_size; left > 0; left = left - b_size) + { + pos++; + if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) + { + logf(LOG_WARN, "truncated isamb header for " + "file=%s len=%d pos=%d", + fname, len, pos); + return 0; + } + } + src = hbuf + 16; + decode_ptr(&src, &isamb->file[i].head.first_block); + decode_ptr(&src, &isamb->file[i].head.last_block); + decode_ptr(&src, &zint_tmp); + isamb->file[i].head.block_size = zint_tmp; + decode_ptr(&src, &zint_tmp); + isamb->file[i].head.block_max = zint_tmp; + decode_ptr(&src, &isamb->file[i].head.free_list); } assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET); isamb->file[i].head_dirty = 0; @@ -235,11 +278,11 @@ static void flush_blocks (ISAMB b, int cat) static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) { - int cat = pos&CAT_MASK; - int off = ((pos/CAT_MAX) & + int cat = (int) (pos&CAT_MASK); + int off = (int) (((pos/CAT_MAX) & (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1)) - * b->file[cat].head.block_size; - int norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); + * b->file[cat].head.block_size); + zint norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); int no = 0; struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0; @@ -310,17 +353,45 @@ void isamb_close (ISAMB isamb) { int i; for (i=0;isamb->accessed_nodes[i];i++) - logf(LOG_DEBUG,"isamb_close level leaf-%d: %d read, %d skipped", + logf(LOG_DEBUG,"isamb_close level leaf-%d: "ZINT_FORMAT" read, " + ZINT_FORMAT" skipped", i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]); - logf(LOG_DEBUG,"isamb_close returned %d values, skipped %d", + logf(LOG_DEBUG,"isamb_close returned "ZINT_FORMAT" values, " + "skipped "ZINT_FORMAT, isamb->skipped_numbers, isamb->returned_numbers); for (i = 0; ino_cat; i++) { flush_blocks (isamb, i); if (isamb->file[i].head_dirty) - bf_write (isamb->file[i].bf, 0, 0, - sizeof(struct ISAMB_head), &isamb->file[i].head); - + { + char hbuf[DST_BUF_SIZE]; + int major = ISAMB_MAJOR_VERSION; + int minor = ISAMB_MINOR_VERSION; + int len = 16; + char *dst = hbuf + 16; + int pos = 0, left; + int b_size = isamb->file[i].head.block_size; + + encode_ptr(&dst, isamb->file[i].head.first_block); + encode_ptr(&dst, isamb->file[i].head.last_block); + encode_ptr(&dst, isamb->file[i].head.block_size); + encode_ptr(&dst, isamb->file[i].head.block_max); + encode_ptr(&dst, isamb->file[i].head.free_list); + memset(dst, '\0', 16); /* ensure no random bytes are written */ + + len = dst - hbuf; + + /* print exactly 16 bytes (including trailing 0) */ + sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len); + + bf_write (isamb->file[i].bf, pos, 0, 0, hbuf); + + for (left = len - b_size; left > 0; left = left - b_size) + { + pos++; + bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); + } + } bf_close (isamb->file[i].bf); } xfree (isamb->file); @@ -330,13 +401,13 @@ void isamb_close (ISAMB isamb) static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) { - int cat = pos&CAT_MASK; + int cat = (int) (pos&CAT_MASK); struct ISAMB_block *p; if (!pos) return 0; p = xmalloc (sizeof(*p)); p->pos = pos; - p->cat = pos & CAT_MASK; + p->cat = (int) (pos & CAT_MASK); p->buf = xmalloc (b->file[cat].head.block_size); p->cbuf = 0; @@ -376,7 +447,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) if (!b->file[cat].head.free_list) { - int block_no; + zint block_no; block_no = b->file[cat].head.last_block++; p->pos = block_no * CAT_MAX + cat; } @@ -424,6 +495,7 @@ struct ISAMB_block *new_int (ISAMB b, int cat) static void check_block (ISAMB b, struct ISAMB_block *p) { + assert(b); /* mostly to make the compiler shut up about unused b */ if (p->leaf) { ; @@ -579,7 +651,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, while (src <= half) { decode_ptr (&src, &split_size_tmp); - *split_size = split_size_tmp; + *split_size = (int) split_size_tmp; src += *split_size; decode_ptr (&src, &pos); @@ -588,7 +660,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, memcpy (p->bytes, dst_buf, p_new_size); decode_ptr (&src, &split_size_tmp); - *split_size = split_size_tmp; + *split_size = (int) split_size_tmp; memcpy (split_item, src, *split_size); src += *split_size; @@ -638,7 +710,6 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, while (1) { const char *dst_item = 0; - char *dst_0 = dst; char *lookahead_next; int d = -1; @@ -678,10 +749,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (d > 0) { if (dst > maxp) - { - dst = dst_0; lookahead_item = 0; - } else { lookahead_next = lookahead_item; @@ -985,11 +1053,13 @@ void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) int i; if (!pp) return; - logf(LOG_DEBUG,"isamb_pp_close lev=%d returned %d values, skipped %d", + logf(LOG_DEBUG,"isamb_pp_close lev=%d returned "ZINT_FORMAT" values," + "skipped "ZINT_FORMAT, pp->maxlevel, pp->skipped_numbers, pp->returned_numbers); for (i=pp->maxlevel;i>=0;i--) if ( pp->skipped_nodes[i] || pp->accessed_nodes[i]) - logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: %d read, %d skipped", i, + logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: " + ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i, pp->accessed_nodes[i], pp->skipped_nodes[i]); pp->isamb->skipped_numbers += pp->skipped_numbers; pp->isamb->returned_numbers += pp->returned_numbers; @@ -1074,7 +1144,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str)) { - return isamb_dump_r(b, pos, pr, 0); + isamb_dump_r(b, pos, pr, 0); } #if 0 @@ -1152,10 +1222,6 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) #if NEW_FORWARD == 1 -/* -#undef ISAMB_DEBUB -#define ISAMB_DEBUG 1 -*/ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) { /* looks one node higher to see if we should be on this node at all */ /* useful in backing off quickly, and in avoiding tail descends */ @@ -1230,6 +1296,7 @@ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, "read_on_leaf returning 1"); #endif */ + pp->returned_numbers++; return 1; } /* read_on_leaf */ @@ -1314,7 +1381,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) } /* climb_level */ -static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) +static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) { /* scans a upper node until it finds a child <= untilbuf */ /* pp points to the key value, as always. pos is the child read from */ /* the buffer */ @@ -1419,7 +1486,7 @@ static int isamb_pp_find_next_leaf(ISAMB_PP pp) return 1; } -static int isamb_pp_climb_desc(ISAMB_PP pp, void *buf, const void *untilbuf) +static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf) { /* climbs up and descends to a leaf where values >= *untilbuf are found */ ISAMB_P pos; #if ISAMB_DEBUG @@ -1462,7 +1529,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) #endif return 1; } - if (! isamb_pp_climb_desc( pp, buf, untilbuf)) { + if (! isamb_pp_climb_desc( pp, untilbuf)) { #if ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) " "at level %d node %d ofs=%d sz=%d", @@ -1800,38 +1867,47 @@ again: int isamb_pp_num (ISAMB_PP pp) { + assert(pp); /* shut up about unused arguments */ return 1; } static void isamb_pp_leaf_pos( ISAMB_PP pp, - int *current, int *total, void *dummybuf ) + double *current, double *total, + void *dummybuf ) { struct ISAMB_block *p = pp->block[pp->level]; const char *src=p->bytes; char *end=p->bytes+p->size; char *cur=p->bytes+p->offset; char *dst; + void *decodeClientData; assert(p->offset <= p->size); assert(cur <= end); assert(p->leaf); *current=0; *total=0; + decodeClientData = (pp->isamb->method->codec.start)(); + while(src < end) { dst=dummybuf; - (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src); + (*pp->isamb->method->codec.decode)(decodeClientData,&dst, &src); assert(dst<(char*) dummybuf+100); /*FIXME */ (*total)++; if (src<=cur) (*current)++; } - logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur=%d tot=%d ofs=%d sz=%d lev=%d", +#if ISAMB_DEBUG + logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur= %0.1f tot=%0.1f " + " ofs=%d sz=%d lev=%d", *current, *total, p->offset, p->size, pp->level); +#endif assert(src==end); + (pp->isamb->method->codec.stop)(decodeClientData); } -static void isamb_pp_upper_pos( ISAMB_PP pp, int *current, int *total, - int size, int level ) +static void isamb_pp_upper_pos( ISAMB_PP pp, double *current, double *total, + double size, int level ) { /* estimates total/current occurrences from here up, excl leaf */ struct ISAMB_block *p = pp->block[level]; const char *src=p->bytes; @@ -1839,31 +1915,49 @@ static void isamb_pp_upper_pos( ISAMB_PP pp, int *current, int *total, char *cur=p->bytes+p->offset; zint item_size; ISAMB_P child; + assert(level>=0); assert(!p->leaf); + +#if 1 // ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_upper_pos at beginning l=%d " - "cur=%d tot=%d ofs=%d sz=%d pos=" ZINT_FORMAT, + "cur=%0.1f tot=%0.1f " + " ofs=%d sz=%d pos=" ZINT_FORMAT, level, *current, *total, p->offset, p->size, p->pos); +#endif assert (p->offset <= p->size); decode_ptr (&src, &child ); /* first child */ + if (src!=cur) { + *total += size; + if (src < cur) + *current +=size; + } while(src < end) { + decode_ptr (&src, &item_size ); + assert(src+item_size<=end); + src += item_size; + decode_ptr (&src, &child ); if (src!=cur) { *total += size; if (src < cur) *current +=size; } - decode_ptr (&src, &item_size ); - assert(src+item_size<=end); - src += item_size; - decode_ptr (&src, &child ); } +#if ISAMB_DEBUG + logf(LOG_DEBUG,"isamb_pp_upper_pos before recursion l=%d " + "cur=%0.1f tot=%0.1f " + " ofs=%d sz=%d pos=" ZINT_FORMAT, + level, *current, *total, p->offset, p->size, p->pos); +#endif if (level>0) isamb_pp_upper_pos(pp, current, total, *total, level-1); } /* upper_pos */ -void isamb_pp_pos( ISAMB_PP pp, int *current, int *total ) +void isamb_pp_pos( ISAMB_PP pp, double *current, double *total ) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ + /* FIXME - Isam-B ought to know how many we have, so we could return */ + /* that directly */ struct ISAMB_block *p = pp->block[pp->level]; char dummy[100]; /* 100 bytes/entry must be enough */ assert(total); @@ -1872,8 +1966,10 @@ void isamb_pp_pos( ISAMB_PP pp, int *current, int *total ) isamb_pp_leaf_pos(pp,current, total, dummy); if (pp->level>0) isamb_pp_upper_pos(pp, current, total, *total, pp->level-1); - /* - logf(LOG_DEBUG,"isamb_pp_pos: C=%d T=%d =%6.2f%%", - *current, *total, 100.0*(*current)/(*total)); - */ + *current = (double) pp->returned_numbers; + /* use the precise number, since we have it! */ +#if ISAMB_DEBUG + logf(LOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="ZINT_FORMAT, + *current, *total, pp->returned_numbers); +#endif }