-/* $Id: isamb.c,v 1.48 2004-08-04 08:35:24 adam Exp $
+/* $Id: isamb.c,v 1.54 2004-08-16 10:08:37 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
#define ISAMB_DEBUG 0
#endif
+#define ISAMB_MAJOR_VERSION 1
+#define ISAMB_MINOR_VERSION 0
+
struct ISAMB_head {
zint first_block;
zint last_block;
#define CAT_NO 4
/* ISAMB_PTR_CODEC=1 var, =0 fixed */
-#define ISAMB_PTR_CODEC 0
+#define ISAMB_PTR_CODEC 1
struct ISAMB_cache_entry {
ISAMB_P pos;
int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */
int log_io; /* log level for bf_read/bf_write calls */
int log_freelist; /* log level for freelist handling */
- int skipped_numbers; /* on a leaf node */
- int returned_numbers;
- int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */
- int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
+ zint skipped_numbers; /* on a leaf node */
+ zint returned_numbers;
+ zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */
+ zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
};
struct ISAMB_block {
ISAMB_P pos;
int level;
int maxlevel; /* total depth */
- int total_size;
- int no_blocks;
- int skipped_numbers; /* on a leaf node */
- int returned_numbers;
- int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */
- int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
+ zint total_size;
+ zint no_blocks;
+ zint skipped_numbers; /* on a leaf node */
+ zint returned_numbers;
+ zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */
+ zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
struct ISAMB_block **block;
};
for (i = 0; i<isamb->no_cat; i++)
{
char fname[DST_BUF_SIZE];
+ char hbuf[DST_BUF_SIZE];
isamb->file[i].cache_entries = 0;
isamb->file[i].head_dirty = 0;
sprintf (fname, "%s%c", name, i+'A');
else
isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag);
-
- if (!bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head),
- &isamb->file[i].head))
+ /* fill-in default values (for empty isamb) */
+ isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1;
+ isamb->file[i].head.last_block = isamb->file[i].head.first_block;
+ isamb->file[i].head.block_size = b_size;
+ isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET;
+ isamb->file[i].head.free_list = 0;
+ if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf))
{
- isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1;
- isamb->file[i].head.last_block = isamb->file[i].head.first_block;
- isamb->file[i].head.block_size = b_size;
- isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET;
- isamb->file[i].head.free_list = 0;
+ /* got header assume "isamb"major minor len can fit in 16 bytes */
+ zint zint_tmp;
+ int major, minor, len, pos = 0;
+ int left;
+ const char *src = 0;
+ if (memcmp(hbuf, "isamb", 5))
+ {
+ logf(LOG_WARN, "bad isamb header for file %s", fname);
+ return 0;
+ }
+ if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3)
+ {
+ logf(LOG_WARN, "bad isamb header for file %s", fname);
+ return 0;
+ }
+ if (major != ISAMB_MAJOR_VERSION)
+ {
+ logf(LOG_WARN, "bad major version for file %s %d, must be %d",
+ fname, major, ISAMB_MAJOR_VERSION);
+ return 0;
+ }
+ for (left = len - b_size; left > 0; left = left - b_size)
+ {
+ pos++;
+ if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size))
+ {
+ logf(LOG_WARN, "truncated isamb header for "
+ "file=%s len=%d pos=%d",
+ fname, len, pos);
+ return 0;
+ }
+ }
+ src = hbuf + 16;
+ decode_ptr(&src, &isamb->file[i].head.first_block);
+ decode_ptr(&src, &isamb->file[i].head.last_block);
+ decode_ptr(&src, &zint_tmp);
+ isamb->file[i].head.block_size = zint_tmp;
+ decode_ptr(&src, &zint_tmp);
+ isamb->file[i].head.block_max = zint_tmp;
+ decode_ptr(&src, &isamb->file[i].head.free_list);
}
assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET);
isamb->file[i].head_dirty = 0;
static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr)
{
- int cat = pos&CAT_MASK;
- int off = ((pos/CAT_MAX) &
+ int cat = (int) (pos&CAT_MASK);
+ int off = (int) (((pos/CAT_MAX) &
(ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1))
- * b->file[cat].head.block_size;
- int norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size);
+ * b->file[cat].head.block_size);
+ zint norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size);
int no = 0;
struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0;
{
int i;
for (i=0;isamb->accessed_nodes[i];i++)
- logf(LOG_DEBUG,"isamb_close level leaf-%d: %d read, %d skipped",
+ logf(LOG_DEBUG,"isamb_close level leaf-%d: "ZINT_FORMAT" read, "
+ ZINT_FORMAT" skipped",
i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]);
- logf(LOG_DEBUG,"isamb_close returned %d values, skipped %d",
+ logf(LOG_DEBUG,"isamb_close returned "ZINT_FORMAT" values, "
+ "skipped "ZINT_FORMAT,
isamb->skipped_numbers, isamb->returned_numbers);
for (i = 0; i<isamb->no_cat; i++)
{
flush_blocks (isamb, i);
if (isamb->file[i].head_dirty)
- bf_write (isamb->file[i].bf, 0, 0,
- sizeof(struct ISAMB_head), &isamb->file[i].head);
-
+ {
+ char hbuf[DST_BUF_SIZE];
+ int major = ISAMB_MAJOR_VERSION;
+ int minor = ISAMB_MINOR_VERSION;
+ int len = 16;
+ char *dst = hbuf + 16;
+ int pos = 0, left;
+ int b_size = isamb->file[i].head.block_size;
+
+ encode_ptr(&dst, isamb->file[i].head.first_block);
+ encode_ptr(&dst, isamb->file[i].head.last_block);
+ encode_ptr(&dst, isamb->file[i].head.block_size);
+ encode_ptr(&dst, isamb->file[i].head.block_max);
+ encode_ptr(&dst, isamb->file[i].head.free_list);
+ memset(dst, '\0', 16); /* ensure no random bytes are written */
+
+ len = dst - hbuf;
+
+ /* print exactly 16 bytes (including trailing 0) */
+ sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len);
+
+ bf_write (isamb->file[i].bf, pos, 0, 0, hbuf);
+
+ for (left = len - b_size; left > 0; left = left - b_size)
+ {
+ pos++;
+ bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size);
+ }
+ }
bf_close (isamb->file[i].bf);
}
xfree (isamb->file);
static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos)
{
- int cat = pos&CAT_MASK;
+ int cat = (int) (pos&CAT_MASK);
struct ISAMB_block *p;
if (!pos)
return 0;
p = xmalloc (sizeof(*p));
p->pos = pos;
- p->cat = pos & CAT_MASK;
+ p->cat = (int) (pos & CAT_MASK);
p->buf = xmalloc (b->file[cat].head.block_size);
p->cbuf = 0;
if (!b->file[cat].head.free_list)
{
- int block_no;
+ zint block_no;
block_no = b->file[cat].head.last_block++;
p->pos = block_no * CAT_MAX + cat;
}
static void check_block (ISAMB b, struct ISAMB_block *p)
{
+ assert(b); /* mostly to make the compiler shut up about unused b */
if (p->leaf)
{
;
while (src <= half)
{
decode_ptr (&src, &split_size_tmp);
- *split_size = split_size_tmp;
+ *split_size = (int) split_size_tmp;
src += *split_size;
decode_ptr (&src, &pos);
memcpy (p->bytes, dst_buf, p_new_size);
decode_ptr (&src, &split_size_tmp);
- *split_size = split_size_tmp;
+ *split_size = (int) split_size_tmp;
memcpy (split_item, src, *split_size);
src += *split_size;
int i;
if (!pp)
return;
- logf(LOG_DEBUG,"isamb_pp_close lev=%d returned %d values, skipped %d",
+ logf(LOG_DEBUG,"isamb_pp_close lev=%d returned "ZINT_FORMAT" values,"
+ "skipped "ZINT_FORMAT,
pp->maxlevel, pp->skipped_numbers, pp->returned_numbers);
for (i=pp->maxlevel;i>=0;i--)
if ( pp->skipped_nodes[i] || pp->accessed_nodes[i])
- logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: %d read, %d skipped", i,
+ logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: "
+ ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i,
pp->accessed_nodes[i], pp->skipped_nodes[i]);
pp->isamb->skipped_numbers += pp->skipped_numbers;
pp->isamb->returned_numbers += pp->returned_numbers;
void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str))
{
- return isamb_dump_r(b, pos, pr, 0);
+ isamb_dump_r(b, pos, pr, 0);
}
#if 0
#if NEW_FORWARD == 1
-/*
-#undef ISAMB_DEBUB
-#define ISAMB_DEBUG 1
-*/
static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf)
{ /* looks one node higher to see if we should be on this node at all */
/* useful in backing off quickly, and in avoiding tail descends */
(*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, "read_on_leaf returning 1");
#endif
*/
+ pp->returned_numbers++;
return 1;
} /* read_on_leaf */
} /* climb_level */
-static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf)
+static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf)
{ /* scans a upper node until it finds a child <= untilbuf */
/* pp points to the key value, as always. pos is the child read from */
/* the buffer */
return 1;
}
-static int isamb_pp_climb_desc(ISAMB_PP pp, void *buf, const void *untilbuf)
+static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf)
{ /* climbs up and descends to a leaf where values >= *untilbuf are found */
ISAMB_P pos;
#if ISAMB_DEBUG
#endif
return 1;
}
- if (! isamb_pp_climb_desc( pp, buf, untilbuf)) {
+ if (! isamb_pp_climb_desc( pp, untilbuf)) {
#if ISAMB_DEBUG
logf(LOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) "
"at level %d node %d ofs=%d sz=%d",
int isamb_pp_num (ISAMB_PP pp)
{
+ assert(pp); /* shut up about unused arguments */
return 1;
}
static void isamb_pp_leaf_pos( ISAMB_PP pp,
- int *current, int *total, void *dummybuf )
+ double *current, double *total,
+ void *dummybuf )
{
struct ISAMB_block *p = pp->block[pp->level];
const char *src=p->bytes;
char *end=p->bytes+p->size;
char *cur=p->bytes+p->offset;
char *dst;
+ void *decodeClientData;
assert(p->offset <= p->size);
assert(cur <= end);
assert(p->leaf);
*current=0;
*total=0;
+ decodeClientData = (pp->isamb->method->codec.start)();
+
while(src < end) {
dst=dummybuf;
- (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src);
+ (*pp->isamb->method->codec.decode)(decodeClientData,&dst, &src);
assert(dst<(char*) dummybuf+100); /*FIXME */
(*total)++;
if (src<=cur)
(*current)++;
}
- logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur=%d tot=%d ofs=%d sz=%d lev=%d",
+#if ISAMB_DEBUG
+ logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur= %0.1f tot=%0.1f "
+ " ofs=%d sz=%d lev=%d",
*current, *total, p->offset, p->size, pp->level);
+#endif
assert(src==end);
+ (pp->isamb->method->codec.stop)(decodeClientData);
}
-static void isamb_pp_upper_pos( ISAMB_PP pp, int *current, int *total,
- int size, int level )
+static void isamb_pp_upper_pos( ISAMB_PP pp, double *current, double *total,
+ double size, int level )
{ /* estimates total/current occurrences from here up, excl leaf */
struct ISAMB_block *p = pp->block[level];
const char *src=p->bytes;
char *cur=p->bytes+p->offset;
zint item_size;
ISAMB_P child;
+
assert(level>=0);
assert(!p->leaf);
+
+#if 1 // ISAMB_DEBUG
logf(LOG_DEBUG,"isamb_pp_upper_pos at beginning l=%d "
- "cur=%d tot=%d ofs=%d sz=%d pos=" ZINT_FORMAT,
+ "cur=%0.1f tot=%0.1f "
+ " ofs=%d sz=%d pos=" ZINT_FORMAT,
level, *current, *total, p->offset, p->size, p->pos);
+#endif
assert (p->offset <= p->size);
decode_ptr (&src, &child ); /* first child */
+ if (src!=cur) {
+ *total += size;
+ if (src < cur)
+ *current +=size;
+ }
while(src < end) {
+ decode_ptr (&src, &item_size );
+ assert(src+item_size<=end);
+ src += item_size;
+ decode_ptr (&src, &child );
if (src!=cur) {
*total += size;
if (src < cur)
*current +=size;
}
- decode_ptr (&src, &item_size );
- assert(src+item_size<=end);
- src += item_size;
- decode_ptr (&src, &child );
}
+#if ISAMB_DEBUG
+ logf(LOG_DEBUG,"isamb_pp_upper_pos before recursion l=%d "
+ "cur=%0.1f tot=%0.1f "
+ " ofs=%d sz=%d pos=" ZINT_FORMAT,
+ level, *current, *total, p->offset, p->size, p->pos);
+#endif
if (level>0)
isamb_pp_upper_pos(pp, current, total, *total, level-1);
} /* upper_pos */
-void isamb_pp_pos( ISAMB_PP pp, int *current, int *total )
+void isamb_pp_pos( ISAMB_PP pp, double *current, double *total )
{ /* return an estimate of the current position and of the total number of */
/* occureences in the isam tree, based on the current leaf */
+ /* FIXME - Isam-B ought to know how many we have, so we could return */
+ /* that directly */
struct ISAMB_block *p = pp->block[pp->level];
char dummy[100]; /* 100 bytes/entry must be enough */
assert(total);
isamb_pp_leaf_pos(pp,current, total, dummy);
if (pp->level>0)
isamb_pp_upper_pos(pp, current, total, *total, pp->level-1);
- /*
- logf(LOG_DEBUG,"isamb_pp_pos: C=%d T=%d =%6.2f%%",
- *current, *total, 100.0*(*current)/(*total));
- */
+ *current = (double) pp->returned_numbers;
+ /* use the precise number, since we have it! */
+#if ISAMB_DEBUG
+ logf(LOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="ZINT_FORMAT,
+ *current, *total, pp->returned_numbers);
+#endif
}