X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=ab9a72afcdcfb4997188ba0cec255179eb69acb9;hb=21b018670707eb7a06f0d722c8a67b66d2778690;hp=2ee7cb1b572b2186361f97608433bc0f9d6f5c8a;hpb=70ba74c9bbf739fbe82d1bc2b0fb64a547c0c9e0;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index 2ee7cb1..ab9a72a 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,4 +1,4 @@ -/* $Id: isamb.c,v 1.47 2004-08-03 14:54:41 heikki Exp $ +/* $Id: isamb.c,v 1.55 2004-08-18 20:00:35 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -30,12 +30,15 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define ISAMB_DEBUG 0 #endif +#define ISAMB_MAJOR_VERSION 1 +#define ISAMB_MINOR_VERSION 0 + struct ISAMB_head { - int first_block; - int last_block; + zint first_block; + zint last_block; int block_size; int block_max; - int free_list; + zint free_list; }; #define ISAMB_DATA_OFFSET 3 @@ -56,7 +59,7 @@ struct ISAMB_head { #define CAT_NO 4 /* ISAMB_PTR_CODEC=1 var, =0 fixed */ -#define ISAMB_PTR_CODEC 0 +#define ISAMB_PTR_CODEC 1 struct ISAMB_cache_entry { ISAMB_P pos; @@ -82,10 +85,10 @@ struct ISAMB_s { int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */ int log_io; /* log level for bf_read/bf_write calls */ int log_freelist; /* log level for freelist handling */ - int skipped_numbers; /* on a leaf node */ - int returned_numbers; - int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ - int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint skipped_numbers; /* on a leaf node */ + zint returned_numbers; + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ }; struct ISAMB_block { @@ -108,18 +111,18 @@ struct ISAMB_PP_s { ISAMB_P pos; int level; int maxlevel; /* total depth */ - int total_size; - int no_blocks; - int skipped_numbers; /* on a leaf node */ - int returned_numbers; - int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ - int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint total_size; + zint no_blocks; + zint skipped_numbers; /* on a leaf node */ + zint returned_numbers; + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ struct ISAMB_block **block; }; #if ISAMB_PTR_CODEC -static void encode_ptr (char **dst, unsigned pos) +static void encode_ptr (char **dst, zint pos) { unsigned char *bp = (unsigned char*) *dst; @@ -132,7 +135,7 @@ static void encode_ptr (char **dst, unsigned pos) *dst = (char *) bp; } #else -static void encode_ptr (char **dst, unsigned pos) +static void encode_ptr (char **dst, zint pos) { memcpy(*dst, &pos, sizeof(pos)); (*dst) += sizeof(pos); @@ -140,23 +143,23 @@ static void encode_ptr (char **dst, unsigned pos) #endif #if ISAMB_PTR_CODEC -static void decode_ptr (char **src1, int *pos) +static void decode_ptr (const char **src1, zint *pos) { - unsigned char **src = (unsigned char **) src1; - unsigned d = 0; + const unsigned char **src = (const unsigned char **) src1; + zint d = 0; unsigned char c; unsigned r = 0; while (((c = *(*src)++) & 128)) { - d += ((c & 127) << r); + d += ((zint) (c & 127) << r); r += 7; } - d += (c << r); + d += ((zint) c << r); *pos = d; } #else -static void decode_ptr (char **src, int *pos) +static void decode_ptr (const char **src, zint *pos) { memcpy (pos, *src, sizeof(*pos)); (*src) += sizeof(*pos); @@ -186,6 +189,7 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, for (i = 0; ino_cat; i++) { char fname[DST_BUF_SIZE]; + char hbuf[DST_BUF_SIZE]; isamb->file[i].cache_entries = 0; isamb->file[i].head_dirty = 0; sprintf (fname, "%s%c", name, i+'A'); @@ -195,15 +199,54 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, else isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag); - - if (!bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), - &isamb->file[i].head)) + /* fill-in default values (for empty isamb) */ + isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; + isamb->file[i].head.last_block = isamb->file[i].head.first_block; + isamb->file[i].head.block_size = b_size; + isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; + isamb->file[i].head.free_list = 0; + if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf)) { - isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; - isamb->file[i].head.last_block = isamb->file[i].head.first_block; - isamb->file[i].head.block_size = b_size; - isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; - isamb->file[i].head.free_list = 0; + /* got header assume "isamb"major minor len can fit in 16 bytes */ + zint zint_tmp; + int major, minor, len, pos = 0; + int left; + const char *src = 0; + if (memcmp(hbuf, "isamb", 5)) + { + logf(LOG_WARN, "bad isamb header for file %s", fname); + return 0; + } + if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3) + { + logf(LOG_WARN, "bad isamb header for file %s", fname); + return 0; + } + if (major != ISAMB_MAJOR_VERSION) + { + logf(LOG_WARN, "bad major version for file %s %d, must be %d", + fname, major, ISAMB_MAJOR_VERSION); + return 0; + } + for (left = len - b_size; left > 0; left = left - b_size) + { + pos++; + if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) + { + logf(LOG_WARN, "truncated isamb header for " + "file=%s len=%d pos=%d", + fname, len, pos); + return 0; + } + } + src = hbuf + 16; + decode_ptr(&src, &isamb->file[i].head.first_block); + decode_ptr(&src, &isamb->file[i].head.last_block); + decode_ptr(&src, &zint_tmp); + isamb->file[i].head.block_size = zint_tmp; + decode_ptr(&src, &zint_tmp); + isamb->file[i].head.block_max = zint_tmp; + decode_ptr(&src, &isamb->file[i].head.free_list); } assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET); isamb->file[i].head_dirty = 0; @@ -235,11 +278,11 @@ static void flush_blocks (ISAMB b, int cat) static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) { - int cat = pos&CAT_MASK; - int off = ((pos/CAT_MAX) & + int cat = (int) (pos&CAT_MASK); + int off = (int) (((pos/CAT_MAX) & (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1)) - * b->file[cat].head.block_size; - int norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); + * b->file[cat].head.block_size); + zint norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); int no = 0; struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0; @@ -310,17 +353,45 @@ void isamb_close (ISAMB isamb) { int i; for (i=0;isamb->accessed_nodes[i];i++) - logf(LOG_DEBUG,"isamb_close level leaf-%d: %d read, %d skipped", + logf(LOG_DEBUG,"isamb_close level leaf-%d: "ZINT_FORMAT" read, " + ZINT_FORMAT" skipped", i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]); - logf(LOG_DEBUG,"isamb_close returned %d values, skipped %d", + logf(LOG_DEBUG,"isamb_close returned "ZINT_FORMAT" values, " + "skipped "ZINT_FORMAT, isamb->skipped_numbers, isamb->returned_numbers); for (i = 0; ino_cat; i++) { flush_blocks (isamb, i); if (isamb->file[i].head_dirty) - bf_write (isamb->file[i].bf, 0, 0, - sizeof(struct ISAMB_head), &isamb->file[i].head); - + { + char hbuf[DST_BUF_SIZE]; + int major = ISAMB_MAJOR_VERSION; + int minor = ISAMB_MINOR_VERSION; + int len = 16; + char *dst = hbuf + 16; + int pos = 0, left; + int b_size = isamb->file[i].head.block_size; + + encode_ptr(&dst, isamb->file[i].head.first_block); + encode_ptr(&dst, isamb->file[i].head.last_block); + encode_ptr(&dst, isamb->file[i].head.block_size); + encode_ptr(&dst, isamb->file[i].head.block_max); + encode_ptr(&dst, isamb->file[i].head.free_list); + memset(dst, '\0', 16); /* ensure no random bytes are written */ + + len = dst - hbuf; + + /* print exactly 16 bytes (including trailing 0) */ + sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len); + + bf_write (isamb->file[i].bf, pos, 0, 0, hbuf); + + for (left = len - b_size; left > 0; left = left - b_size) + { + pos++; + bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); + } + } bf_close (isamb->file[i].bf); } xfree (isamb->file); @@ -330,13 +401,13 @@ void isamb_close (ISAMB isamb) static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) { - int cat = pos&CAT_MASK; + int cat = (int) (pos&CAT_MASK); struct ISAMB_block *p; if (!pos) return 0; p = xmalloc (sizeof(*p)); p->pos = pos; - p->cat = pos & CAT_MASK; + p->cat = (int) (pos & CAT_MASK); p->buf = xmalloc (b->file[cat].head.block_size); p->cbuf = 0; @@ -355,14 +426,15 @@ static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) p->size = (p->buf[1] + 256 * p->buf[2]) - ISAMB_DATA_OFFSET; if (p->size < 0) { - yaz_log (LOG_FATAL, "Bad block size %d in pos=%d\n", p->size, pos); + yaz_log (LOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n", + p->size, pos); } assert (p->size >= 0); p->offset = 0; p->dirty = 0; p->deleted = 0; - p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); - yaz_log (LOG_DEBUG, "isamb_open_block: Opened block %d ofs=%d",pos, p->offset); + p->decodeClientData = (*b->method->codec.start)(); + yaz_log (LOG_DEBUG, "isamb_open_block: Opened block " ZINT_FORMAT " ofs=%d",pos, p->offset); return p; } @@ -375,7 +447,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) if (!b->file[cat].head.free_list) { - int block_no; + zint block_no; block_no = b->file[cat].head.last_block++; p->pos = block_no * CAT_MAX + cat; } @@ -393,7 +465,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) abort (); } } - yaz_log (b->log_freelist, "got block %d from freelist %d:%d", p->pos, + yaz_log (b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, cat, p->pos/CAT_MAX); memcpy (&b->file[cat].head.free_list, p->buf, sizeof(int)); } @@ -406,7 +478,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) p->dirty = 1; p->deleted = 0; p->offset = 0; - p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); + p->decodeClientData = (*b->method->codec.start)(); return p; } @@ -423,6 +495,7 @@ struct ISAMB_block *new_int (ISAMB b, int cat) static void check_block (ISAMB b, struct ISAMB_block *p) { + assert(b); /* mostly to make the compiler shut up about unused b */ if (p->leaf) { ; @@ -431,17 +504,17 @@ static void check_block (ISAMB b, struct ISAMB_block *p) { /* sanity check */ char *startp = p->bytes; - char *src = startp; + const char *src = startp; char *endp = p->bytes + p->size; - int pos; + ISAMB_P pos; decode_ptr (&src, &pos); assert ((pos&CAT_MASK) == p->cat); while (src != endp) { - int item_len; + zint item_len; decode_ptr (&src, &item_len); - assert (item_len > 0 && item_len < 30); + assert (item_len > 0 && item_len < 80); src += item_len; decode_ptr (&src, &pos); assert ((pos&CAT_MASK) == p->cat); @@ -455,7 +528,7 @@ void close_block (ISAMB b, struct ISAMB_block *p) return; if (p->deleted) { - yaz_log (b->log_freelist, "release block %d from freelist %d:%d", + yaz_log (b->log_freelist, "release block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, p->cat, p->pos/CAT_MAX); memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(int)); b->file[p->cat].head.free_list = p->pos; @@ -479,7 +552,7 @@ void close_block (ISAMB b, struct ISAMB_block *p) bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); } } - (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData); + (*b->method->codec.stop)(p->decodeClientData); xfree (p->buf); xfree (p); } @@ -489,17 +562,17 @@ int insert_sub (ISAMB b, struct ISAMB_block **p, ISAMC_I *stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, - void *max_item); + const void *max_item); int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, int *mode, ISAMC_I *stream, struct ISAMB_block **sp, - void *split_item, int *split_size, void *last_max_item) + void *split_item, int *split_size, const void *last_max_item) { char *startp = p->bytes; - char *src = startp; + const char *src = startp; char *endp = p->bytes + p->size; - int pos; + ISAMB_P pos; struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0; char sub_item[DST_ITEM_MAX]; int sub_size; @@ -511,9 +584,9 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, decode_ptr (&src, &pos); while (src != endp) { - int item_len; + zint item_len; int d; - char *src0 = src; + const char *src0 = src; decode_ptr (&src, &item_len); d = (*b->method->compare_item)(src, lookahead_item); if (d > 0) @@ -542,7 +615,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, char dst_buf[DST_BUF_SIZE]; char *dst = dst_buf; - assert (sub_size < 30 && sub_size > 1); + assert (sub_size < 80 && sub_size > 1); memcpy (dst, startp, src - startp); @@ -567,8 +640,9 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, } else { + zint split_size_tmp; int p_new_size; - char *half; + const char *half; src = dst_buf; endp = dst; @@ -576,14 +650,17 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, decode_ptr (&src, &pos); while (src <= half) { - decode_ptr (&src, split_size); + decode_ptr (&src, &split_size_tmp); + *split_size = (int) split_size_tmp; + src += *split_size; decode_ptr (&src, &pos); } p_new_size = src - dst_buf; memcpy (p->bytes, dst_buf, p_new_size); - decode_ptr (&src, split_size); + decode_ptr (&src, &split_size_tmp); + *split_size = (int) split_size_tmp; memcpy (split_item, src, *split_size); src += *split_size; @@ -600,19 +677,19 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, return more; } - int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int *lookahead_mode, ISAMC_I *stream, struct ISAMB_block **sp2, void *sub_item, int *sub_size, - void *max_item) + const void *max_item) { struct ISAMB_block *p = *sp1; - char *src = 0, *endp = 0; + char *endp = 0; + const char *src = 0; char dst_buf[DST_BUF_SIZE], *dst = dst_buf; int new_size; - void *c1 = (*b->method->code_start)(ISAMC_DECODE); - void *c2 = (*b->method->code_start)(ISAMC_ENCODE); + void *c1 = (*b->method->codec.start)(); + void *c2 = (*b->method->codec.start)(); int more = 1; int quater = b->file[b->no_cat-1].head.block_max / CAT_MAX; char *cut = dst_buf + quater * 2; @@ -629,11 +706,10 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, src = p->bytes; endp = p->bytes + p->size; - (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src); + (*b->method->codec.decode)(c1, &file_item, &src); while (1) { - char *dst_item = 0; - char *dst_0 = dst; + const char *dst_item = 0; char *lookahead_next; int d = -1; @@ -657,25 +733,23 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } else if (!half1 && dst > cut) { - char *dst_item_0 = dst_item; + const char *dst_item_0 = dst_item; half1 = dst; /* candidate for splitting */ - (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &dst_item); + (*b->method->codec.encode)(c2, &dst, &dst_item); cut_item_size = dst_item - dst_item_0; + assert(cut_item_size > 0); memcpy (cut_item_buf, dst_item_0, cut_item_size); half2 = dst; } else - (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &dst_item); + (*b->method->codec.encode)(c2, &dst, &dst_item); if (d > 0) { if (dst > maxp) - { - dst = dst_0; lookahead_item = 0; - } else { lookahead_next = lookahead_item; @@ -708,21 +782,22 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (src == endp) break; file_item = file_item_buf; - (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src); + (*b->method->codec.decode)(c1, &file_item, &src); } else { if (src == endp) break; file_item = file_item_buf; - (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src); + (*b->method->codec.decode)(c1, &file_item, &src); } } } maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; while (lookahead_item) { - char *dst_item = lookahead_item; + char *dst_item; + const char *src = lookahead_item; char *dst_0 = dst; if (max_item && @@ -738,18 +813,19 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } else if (!half1 && dst > cut) { - char *dst_item_0 = dst_item; + const char *src_0 = src; half1 = dst; /* candidate for splitting */ - (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &dst_item); + (*b->method->codec.encode)(c2, &dst, &src); - cut_item_size = dst_item - dst_item_0; - memcpy (cut_item_buf, dst_item_0, cut_item_size); + cut_item_size = src - src_0; + assert(cut_item_size > 0); + memcpy (cut_item_buf, src_0, cut_item_size); half2 = dst; } else - (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &dst_item); + (*b->method->codec.encode)(c2, &dst, &src); if (dst > maxp) { @@ -789,23 +865,25 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (new_size > b->file[p->cat].head.block_max) { char *first_dst; - char *cut_item = cut_item_buf; + const char *cut_item = cut_item_buf; assert (half1); assert (half2); - /* first half */ + assert(cut_item_size > 0); + + /* first half */ p->size = half1 - dst_buf; memcpy (p->bytes, dst_buf, half1 - dst_buf); /* second half */ *sp2 = new_leaf (b, p->cat); - (*b->method->code_reset)(c2); + (*b->method->codec.reset)(c2); first_dst = (*sp2)->bytes; - (*b->method->code_item)(ISAMC_ENCODE, c2, &first_dst, &cut_item); + (*b->method->codec.encode)(c2, &first_dst, &cut_item); memcpy (first_dst, half2, dst - half2); @@ -820,8 +898,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, memcpy (p->bytes, dst_buf, dst - dst_buf); p->size = new_size; } - (*b->method->code_stop)(ISAMC_DECODE, c1); - (*b->method->code_stop)(ISAMC_ENCODE, c2); + (*b->method->codec.stop)(c1); + (*b->method->codec.stop)(c2); *sp1 = p; return more; } @@ -831,7 +909,7 @@ int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, ISAMC_I *stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, - void *max_item) + const void *max_item) { if (!*p || (*p)->leaf) return insert_leaf (b, p, new_item, mode, stream, sp, sub_item, @@ -851,9 +929,9 @@ int isamb_unlink (ISAMB b, ISAMC_P pos) p1->deleted = 1; if (!p1->leaf) { - int sub_p; - int item_len; - char *src = p1->bytes + p1->offset; + zint sub_p; + zint item_len; + const char *src = p1->bytes + p1->offset; decode_ptr(&src, &sub_p); isamb_unlink(b, sub_p); @@ -870,7 +948,7 @@ int isamb_unlink (ISAMB b, ISAMC_P pos) return 0; } -int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) +ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) { char item_buf[DST_ITEM_MAX]; char *item_ptr; @@ -906,7 +984,7 @@ int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) char *dst = p2->bytes + p2->size; encode_ptr (&dst, p->pos); - assert (sub_size < 20); + assert (sub_size < 40); encode_ptr (&dst, sub_size); memcpy (dst, sub_item, sub_size); dst += sub_size; @@ -944,7 +1022,7 @@ ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level) while (1) { struct ISAMB_block *p = open_block (isamb, pos); - char *src = p->bytes + p->offset; + const char *src = p->bytes + p->offset; pp->block[pp->level] = p; pp->total_size += p->size; @@ -975,11 +1053,13 @@ void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) int i; if (!pp) return; - logf(LOG_DEBUG,"isamb_pp_close lev=%d returned %d values, skipped %d", + logf(LOG_DEBUG,"isamb_pp_close lev=%d returned "ZINT_FORMAT" values," + "skipped "ZINT_FORMAT, pp->maxlevel, pp->skipped_numbers, pp->returned_numbers); for (i=pp->maxlevel;i>=0;i--) if ( pp->skipped_nodes[i] || pp->accessed_nodes[i]) - logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: %d read, %d skipped", i, + logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: " + ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i, pp->accessed_nodes[i], pp->skipped_nodes[i]); pp->isamb->skipped_numbers += pp->skipped_numbers; pp->isamb->returned_numbers += pp->returned_numbers; @@ -1019,18 +1099,17 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), if (pos) { struct ISAMB_block *p = open_block (b, pos); - sprintf(prefix_str, "%*s %d cat=%d size=%d max=%d", level*2, "", + sprintf(prefix_str, "%*s " ZINT_FORMAT " cat=%d size=%d max=%d", level*2, "", pos, p->cat, p->size, b->file[p->cat].head.block_max); (*pr)(prefix_str); - sprintf(prefix_str, "%*s %d", level*2, "", pos); + sprintf(prefix_str, "%*s " ZINT_FORMAT, level*2, "", pos); if (p->leaf) { while (p->offset < p->size) { - char *src = p->bytes + p->offset; + const char *src = p->bytes + p->offset; char *dst = buf; - (*b->method->code_item)(ISAMC_DECODE, p->decodeClientData, - &dst, &src); + (*b->method->codec.decode)(p->decodeClientData, &dst, &src); (*b->method->log_item)(LOG_DEBUG, buf, prefix_str); p->offset = src - (char*) p->bytes; } @@ -1038,9 +1117,9 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), } else { - char *src = p->bytes + p->offset; - int sub; - int item_len; + const char *src = p->bytes + p->offset; + ISAMB_P sub; + zint item_len; decode_ptr (&src, &sub); p->offset = src - (char*) p->bytes; @@ -1065,7 +1144,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str)) { - return isamb_dump_r(b, pos, pr, 0); + isamb_dump_r(b, pos, pr, 0); } #if 0 @@ -1125,7 +1204,7 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) assert (p->offset < p->size); assert (p->leaf); src = p->bytes + p->offset; - (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData, + (*pp->isamb->method->codec.code_item)(ISAMC_DECODE, p->decodeClientData, &dst, &src); p->offset = src - (char*) p->bytes; /* key_logdump_txt(LOG_DEBUG,buf, "isamb_pp_read returning 1"); */ @@ -1135,7 +1214,7 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) #else int isamb_pp_read (ISAMB_PP pp, void *buf) { - return isamb_pp_forward(pp,buf,0); + return isamb_pp_forward(pp, buf, 0); } #endif @@ -1143,18 +1222,14 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) #if NEW_FORWARD == 1 -/* -#undef ISAMB_DEBUB -#define ISAMB_DEBUG 1 -*/ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) { /* looks one node higher to see if we should be on this node at all */ /* useful in backing off quickly, and in avoiding tail descends */ /* call with pp->level to begin with */ struct ISAMB_block *p; int cmp; - char *src; - int item_len; + const char *src; + zint item_len; assert(level>=0); if ( level == 0) { #if ISAMB_DEBUG @@ -1169,10 +1244,10 @@ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) { assert(p->offset>0); src=p->bytes + p->offset; - decode_ptr(&src,&item_len); + decode_ptr(&src, &item_len); #if ISAMB_DEBUG - (*pp->isamb->method->log_item)(LOG_DEBUG,untilbuf,"on_leaf: until"); - (*pp->isamb->method->log_item)(LOG_DEBUG,src,"on_leaf: value"); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG,untilbuf,"on_leaf: until"); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG,src,"on_leaf: value"); #endif cmp=(*pp->isamb->method->compare_item)(untilbuf,src); if (cmp<2) { @@ -1203,7 +1278,7 @@ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) { /* reads the next item on the current leaf, returns 0 if end of leaf*/ struct ISAMB_block *p = pp->block[pp->level]; char *dst; - char *src; + const char *src; assert(pp); assert(buf); if (p->offset == p->size) { @@ -1214,14 +1289,14 @@ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) } src=p->bytes + p->offset; dst=buf; - (*pp->isamb->method->code_item) - (ISAMC_DECODE, p->decodeClientData,&dst, &src); + (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src); p->offset = src - (char*) p->bytes; /* #if ISAMB_DEBUG - (*pp->isamb->method->log_item)(LOG_DEBUG, buf, "read_on_leaf returning 1"); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, "read_on_leaf returning 1"); #endif */ + pp->returned_numbers++; return 1; } /* read_on_leaf */ @@ -1250,12 +1325,12 @@ static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf } } /* forward_on_leaf */ -static int isamb_pp_climb_level(ISAMB_PP pp, int *pos) +static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) { /* climbs higher in the tree, until finds a level with data left */ /* returns the node to (consider to) descend to in *pos) */ struct ISAMB_block *p = pp->block[pp->level]; - char *src; - int item_len; + const char *src; + zint item_len; #if ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_climb_level starting " "at level %d node %d ofs=%d sz=%d", @@ -1306,7 +1381,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, int *pos) } /* climb_level */ -static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) +static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) { /* scans a upper node until it finds a child <= untilbuf */ /* pp points to the key value, as always. pos is the child read from */ /* the buffer */ @@ -1315,10 +1390,10 @@ static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) /* parent node, but that gets messy. Presumably the cost is */ /* pretty low anyway */ struct ISAMB_block *p = pp->block[pp->level]; - char *src=p->bytes + p->offset; - int item_len; + const char *src=p->bytes + p->offset; + zint item_len; int cmp; - int nxtpos; + zint nxtpos; #if ISAMB_DEBUG int skips=0; logf(LOG_DEBUG,"isamb_pp_forward_unode starting " @@ -1365,10 +1440,10 @@ static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) } /* forward_unode */ -static void isamb_pp_descend_to_leaf(ISAMB_PP pp, int pos, const void *untilbuf) +static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *untilbuf) { /* climbs down the tree, from pos, to the leftmost leaf */ struct ISAMB_block *p = pp->block[pp->level]; - char *src; + const char *src; assert(!p->leaf); #if ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_descend_to_leaf " @@ -1404,16 +1479,16 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, int pos, const void *untilbuf) static int isamb_pp_find_next_leaf(ISAMB_PP pp) { /* finds the next leaf by climbing up and down */ - int pos; + ISAMB_P pos; if (!isamb_pp_climb_level(pp,&pos)) return 0; isamb_pp_descend_to_leaf(pp, pos,0); return 1; } -static int isamb_pp_climb_desc(ISAMB_PP pp, void *buf, const void *untilbuf) +static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf) { /* climbs up and descends to a leaf where values >= *untilbuf are found */ - int pos; + ISAMB_P pos; #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; logf(LOG_DEBUG,"isamb_pp_climb_desc starting " @@ -1454,7 +1529,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) #endif return 1; } - if (! isamb_pp_climb_desc( pp, buf, untilbuf)) { + if (! isamb_pp_climb_desc( pp, untilbuf)) { #if ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) " "at level %d node %d ofs=%d sz=%d", @@ -1523,7 +1598,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) * in the node just read. */ char *dst = buf; - char *src; + const char *src; struct ISAMB_block *p = pp->block[pp->level]; int cmp; int item_len; @@ -1535,8 +1610,8 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) #if ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_forward starting [%p] p=%d",pp,p->pos); - (*pp->isamb->method->log_item)(LOG_DEBUG, untilbuf, "until"); - (*pp->isamb->method->log_item)(LOG_DEBUG, buf, "buf"); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG, untilbuf, "until"); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, "buf"); #endif while (1) @@ -1571,7 +1646,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) src = p->bytes + p->offset; decode_ptr(&src, &item_len); #if ISAMB_DEBUG - (*pp->isamb->method->log_item)(LOG_DEBUG, src, + (*pp->isamb->method->codec.log_item)(LOG_DEBUG, src, " isamb_pp_forward " "climb skipping old key"); #endif @@ -1596,7 +1671,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) logf(LOG_DEBUG,"isamb_pp_forward (B) on a high node. " "ofs=%d sz=%d nxtpos=%d ", p->offset,p->size,pos); - (*pp->isamb->method->log_item)(LOG_DEBUG, src, ""); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG, src, ""); #endif if (untilbuf) cmp=(*pp->isamb->method->compare_item)(untilbuf,src); @@ -1655,7 +1730,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) assert (p->offset < p->size); src = p->bytes + p->offset; dst=buf; - (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData, + (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src); p->offset = src - (char*) p->bytes; if (untilbuf) @@ -1665,19 +1740,19 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) #if ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_forward on a leaf. cmp=%d", cmp); - (*pp->isamb->method->log_item)(LOG_DEBUG, buf, ""); + (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, ""); #endif if (cmp <2) { #if ISAMB_DEBUG if (untilbuf) { - (*pp->isamb->method->log_item)( + (*pp->isamb->method->codec.log_item)( LOG_DEBUG, buf, "isamb_pp_forward returning 1"); } else { - (*pp->isamb->method->log_item)( + (*pp->isamb->method->codec.log_item)( LOG_DEBUG, buf, "isamb_pp_read returning 1 (fwd)"); } #endif @@ -1696,7 +1771,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilb) { char *dst = buf; - char *src; + const char *src; struct ISAMB_block *p = pp->block[pp->level]; if (!p) return 0; @@ -1777,8 +1852,7 @@ again: { char *dst0 = dst; src = p->bytes + p->offset; - (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData, - &dst, &src); + (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src); p->offset = src - (char*) p->bytes; if (!untilb || (*pp->isamb->method->compare_item)(untilb, dst0) <= 1) break; @@ -1793,71 +1867,97 @@ again: int isamb_pp_num (ISAMB_PP pp) { + assert(pp); /* shut up about unused arguments */ return 1; } static void isamb_pp_leaf_pos( ISAMB_PP pp, - int *current, int *total, void *dummybuf ) + double *current, double *total, + void *dummybuf ) { struct ISAMB_block *p = pp->block[pp->level]; - char *src=p->bytes; + const char *src=p->bytes; char *end=p->bytes+p->size; char *cur=p->bytes+p->offset; char *dst; + void *decodeClientData; assert(p->offset <= p->size); assert(cur <= end); assert(p->leaf); *current=0; *total=0; + decodeClientData = (pp->isamb->method->codec.start)(); + while(src < end) { dst=dummybuf; - (*pp->isamb->method->code_item) - (ISAMC_DECODE, p->decodeClientData,&dst, &src); + (*pp->isamb->method->codec.decode)(decodeClientData,&dst, &src); assert(dst<(char*) dummybuf+100); /*FIXME */ (*total)++; if (src<=cur) (*current)++; } - logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur=%d tot=%d ofs=%d sz=%d lev=%d", +#if ISAMB_DEBUG + logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur= %0.1f tot=%0.1f " + " ofs=%d sz=%d lev=%d", *current, *total, p->offset, p->size, pp->level); +#endif assert(src==end); + (pp->isamb->method->codec.stop)(decodeClientData); } -static void isamb_pp_upper_pos( ISAMB_PP pp, int *current, int *total, - int size, int level ) +static void isamb_pp_upper_pos( ISAMB_PP pp, double *current, double *total, + double size, int level ) { /* estimates total/current occurrences from here up, excl leaf */ struct ISAMB_block *p = pp->block[level]; - char *src=p->bytes; + const char *src=p->bytes; char *end=p->bytes+p->size; char *cur=p->bytes+p->offset; - int item_size; - int child; + zint item_size; + ISAMB_P child; + assert(level>=0); assert(!p->leaf); + +#if 1 // ISAMB_DEBUG logf(LOG_DEBUG,"isamb_pp_upper_pos at beginning l=%d " - "cur=%d tot=%d ofs=%d sz=%d pos=%d", + "cur=%0.1f tot=%0.1f " + " ofs=%d sz=%d pos=" ZINT_FORMAT, level, *current, *total, p->offset, p->size, p->pos); +#endif assert (p->offset <= p->size); - decode_ptr (&src, &child ); /* first child */ + decode_ptr (&src, &child ); /* first child */ + if (src!=cur) { + *total += size; + if (src < cur) + *current +=size; + } while(src < end) { + decode_ptr (&src, &item_size ); + assert(src+item_size<=end); + src += item_size; + decode_ptr (&src, &child ); if (src!=cur) { *total += size; if (src < cur) *current +=size; } - decode_ptr (&src, &item_size ); - assert(src+item_size<=end); - src += item_size; - decode_ptr (&src, &child ); } +#if ISAMB_DEBUG + logf(LOG_DEBUG,"isamb_pp_upper_pos before recursion l=%d " + "cur=%0.1f tot=%0.1f " + " ofs=%d sz=%d pos=" ZINT_FORMAT, + level, *current, *total, p->offset, p->size, p->pos); +#endif if (level>0) isamb_pp_upper_pos(pp, current, total, *total, level-1); } /* upper_pos */ -void isamb_pp_pos( ISAMB_PP pp, int *current, int *total ) +void isamb_pp_pos( ISAMB_PP pp, double *current, double *total ) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ + /* FIXME - Isam-B ought to know how many we have, so we could return */ + /* that directly */ struct ISAMB_block *p = pp->block[pp->level]; char dummy[100]; /* 100 bytes/entry must be enough */ assert(total); @@ -1866,8 +1966,10 @@ void isamb_pp_pos( ISAMB_PP pp, int *current, int *total ) isamb_pp_leaf_pos(pp,current, total, dummy); if (pp->level>0) isamb_pp_upper_pos(pp, current, total, *total, pp->level-1); - /* - logf(LOG_DEBUG,"isamb_pp_pos: C=%d T=%d =%6.2f%%", - *current, *total, 100.0*(*current)/(*total)); - */ + *current = (double) pp->returned_numbers; + /* use the precise number, since we have it! */ +#if ISAMB_DEBUG + logf(LOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="ZINT_FORMAT, + *current, *total, pp->returned_numbers); +#endif }