X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=isamb%2Fisamb.c;h=e4b87ca66dd42f5df1ee4a5e40b39cb000bccd06;hp=8ddb9c7cbe45cd4d3f2b8c5781a3b78be91c0c48;hb=0e56fa84bb4a5985c19a21926e86862c08d84689;hpb=5660e317ea2972ea6bb6a4f8a415f71579f71103 diff --git a/isamb/isamb.c b/isamb/isamb.c index 8ddb9c7..e4b87ca 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,4 +1,4 @@ -/* $Id: isamb.c,v 1.51 2004-08-06 10:09:27 heikki Exp $ +/* $Id: isamb.c,v 1.61 2004-12-08 14:02:36 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -22,24 +22,27 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#include -#include +#include +#include #include #ifndef ISAMB_DEBUG #define ISAMB_DEBUG 0 #endif +#define ISAMB_MAJOR_VERSION 2 +#define ISAMB_MINOR_VERSION 0 + struct ISAMB_head { zint first_block; zint last_block; + zint free_list; + zint no_items; int block_size; int block_max; - zint free_list; + int block_offset; }; -#define ISAMB_DATA_OFFSET 3 - /* maximum size of encoded buffer */ #define DST_ITEM_MAX 256 @@ -56,7 +59,7 @@ struct ISAMB_head { #define CAT_NO 4 /* ISAMB_PTR_CODEC=1 var, =0 fixed */ -#define ISAMB_PTR_CODEC 0 +#define ISAMB_PTR_CODEC 1 struct ISAMB_cache_entry { ISAMB_P pos; @@ -82,10 +85,10 @@ struct ISAMB_s { int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */ int log_io; /* log level for bf_read/bf_write calls */ int log_freelist; /* log level for freelist handling */ - int skipped_numbers; /* on a leaf node */ - int returned_numbers; - int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ - int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint skipped_numbers; /* on a leaf node */ + zint returned_numbers; + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ }; struct ISAMB_block { @@ -96,6 +99,7 @@ struct ISAMB_block { int dirty; int deleted; int offset; + zint no_items; /* number of nodes in this + children */ char *bytes; char *cbuf; unsigned char *buf; @@ -108,13 +112,14 @@ struct ISAMB_PP_s { ISAMB_P pos; int level; int maxlevel; /* total depth */ - int total_size; - int no_blocks; - int skipped_numbers; /* on a leaf node */ - int returned_numbers; - int skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ - int accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint total_size; + zint no_blocks; + zint skipped_numbers; /* on a leaf node */ + zint returned_numbers; + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ struct ISAMB_block **block; + int scope; /* on what level we forward */ }; @@ -183,9 +188,10 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, assert (cache == 0); isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat); - for (i = 0; ino_cat; i++) + for (i = 0; i < isamb->no_cat; i++) { char fname[DST_BUF_SIZE]; + char hbuf[DST_BUF_SIZE]; isamb->file[i].cache_entries = 0; isamb->file[i].head_dirty = 0; sprintf (fname, "%s%c", name, i+'A'); @@ -195,23 +201,67 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, else isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag); - - if (!bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), - &isamb->file[i].head)) + /* fill-in default values (for empty isamb) */ + isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; + isamb->file[i].head.last_block = isamb->file[i].head.first_block; + isamb->file[i].head.block_size = b_size; + if (i == isamb->no_cat-1 || b_size > 128) + isamb->file[i].head.block_offset = 8; + else + isamb->file[i].head.block_offset = 4; + isamb->file[i].head.block_max = + b_size - isamb->file[i].head.block_offset; + isamb->file[i].head.free_list = 0; + if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf)) { - isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; - isamb->file[i].head.last_block = isamb->file[i].head.first_block; - isamb->file[i].head.block_size = b_size; - isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; - isamb->file[i].head.free_list = 0; + /* got header assume "isamb"major minor len can fit in 16 bytes */ + zint zint_tmp; + int major, minor, len, pos = 0; + int left; + const char *src = 0; + if (memcmp(hbuf, "isamb", 5)) + { + yaz_log(YLOG_WARN, "bad isamb header for file %s", fname); + return 0; + } + if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3) + { + yaz_log(YLOG_WARN, "bad isamb header for file %s", fname); + return 0; + } + if (major != ISAMB_MAJOR_VERSION) + { + yaz_log(YLOG_WARN, "bad major version for file %s %d, must be %d", + fname, major, ISAMB_MAJOR_VERSION); + return 0; + } + for (left = len - b_size; left > 0; left = left - b_size) + { + pos++; + if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) + { + yaz_log(YLOG_WARN, "truncated isamb header for " + "file=%s len=%d pos=%d", + fname, len, pos); + return 0; + } + } + src = hbuf + 16; + decode_ptr(&src, &isamb->file[i].head.first_block); + decode_ptr(&src, &isamb->file[i].head.last_block); + decode_ptr(&src, &zint_tmp); + isamb->file[i].head.block_size = zint_tmp; + decode_ptr(&src, &zint_tmp); + isamb->file[i].head.block_max = zint_tmp; + decode_ptr(&src, &isamb->file[i].head.free_list); } - assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET); + assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset); isamb->file[i].head_dirty = 0; assert(isamb->file[i].head.block_size == b_size); b_size = b_size * 4; } #if ISAMB_DEBUG - logf(LOG_WARN, "isamb debug enabled. Things will be slower than usual"); + yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual"); #endif return isamb; } @@ -235,11 +285,11 @@ static void flush_blocks (ISAMB b, int cat) static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) { - int cat = pos&CAT_MASK; - int off = ((pos/CAT_MAX) & + int cat = (int) (pos&CAT_MASK); + int off = (int) (((pos/CAT_MAX) & (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1)) - * b->file[cat].head.block_size; - int norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); + * b->file[cat].head.block_size); + zint norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); int no = 0; struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0; @@ -310,17 +360,45 @@ void isamb_close (ISAMB isamb) { int i; for (i=0;isamb->accessed_nodes[i];i++) - logf(LOG_DEBUG,"isamb_close level leaf-%d: %d read, %d skipped", + yaz_log(YLOG_DEBUG,"isamb_close level leaf-%d: "ZINT_FORMAT" read, " + ZINT_FORMAT" skipped", i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]); - logf(LOG_DEBUG,"isamb_close returned %d values, skipped %d", + yaz_log(YLOG_DEBUG,"isamb_close returned "ZINT_FORMAT" values, " + "skipped "ZINT_FORMAT, isamb->skipped_numbers, isamb->returned_numbers); for (i = 0; ino_cat; i++) { flush_blocks (isamb, i); if (isamb->file[i].head_dirty) - bf_write (isamb->file[i].bf, 0, 0, - sizeof(struct ISAMB_head), &isamb->file[i].head); - + { + char hbuf[DST_BUF_SIZE]; + int major = ISAMB_MAJOR_VERSION; + int minor = ISAMB_MINOR_VERSION; + int len = 16; + char *dst = hbuf + 16; + int pos = 0, left; + int b_size = isamb->file[i].head.block_size; + + encode_ptr(&dst, isamb->file[i].head.first_block); + encode_ptr(&dst, isamb->file[i].head.last_block); + encode_ptr(&dst, isamb->file[i].head.block_size); + encode_ptr(&dst, isamb->file[i].head.block_max); + encode_ptr(&dst, isamb->file[i].head.free_list); + memset(dst, '\0', b_size); /* ensure no random bytes are written */ + + len = dst - hbuf; + + /* print exactly 16 bytes (including trailing 0) */ + sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len); + + bf_write (isamb->file[i].bf, pos, 0, 0, hbuf); + + for (left = len - b_size; left > 0; left = left - b_size) + { + pos++; + bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); + } + } bf_close (isamb->file[i].bf); } xfree (isamb->file); @@ -328,15 +406,27 @@ void isamb_close (ISAMB isamb) xfree (isamb); } +/* open_block: read one block at pos. + Decode leading sys bytes .. consisting of + Offset:Meaning + 0: leader byte, != 0 leaf, == 0, non-leaf + 1-2: used size of block + 3-7*: number of items and all children + + * Reserve 5 bytes for large block sizes. 1 for small ones .. Number + of items. We can thus have at most 2^40 nodes. +*/ static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) { - int cat = pos&CAT_MASK; + int cat = (int) (pos&CAT_MASK); + const char *src; + int offset = b->file[cat].head.block_offset; struct ISAMB_block *p; if (!pos) return 0; p = xmalloc (sizeof(*p)); p->pos = pos; - p->cat = pos & CAT_MASK; + p->cat = (int) (pos & CAT_MASK); p->buf = xmalloc (b->file[cat].head.block_size); p->cbuf = 0; @@ -345,25 +435,27 @@ static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) yaz_log (b->log_io, "bf_read: open_block"); if (!bf_read (b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf)) { - yaz_log (LOG_FATAL, "isamb: read fail for pos=%ld block=%ld", + yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) pos, (long) pos/CAT_MAX); abort(); } } - p->bytes = p->buf + ISAMB_DATA_OFFSET; + p->bytes = p->buf + offset; p->leaf = p->buf[0]; - p->size = (p->buf[1] + 256 * p->buf[2]) - ISAMB_DATA_OFFSET; + p->size = (p->buf[1] + 256 * p->buf[2]) - offset; if (p->size < 0) { - yaz_log (LOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n", + yaz_log (YLOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n", p->size, pos); } assert (p->size >= 0); + src = p->buf + 3; + decode_ptr(&src, &p->no_items); + p->offset = 0; p->dirty = 0; p->deleted = 0; p->decodeClientData = (*b->method->codec.start)(); - yaz_log (LOG_DEBUG, "isamb_open_block: Opened block " ZINT_FORMAT " ofs=%d",pos, p->offset); return p; } @@ -376,7 +468,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) if (!b->file[cat].head.free_list) { - int block_no; + zint block_no; block_no = b->file[cat].head.last_block++; p->pos = block_no * CAT_MAX + cat; } @@ -389,7 +481,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) yaz_log (b->log_io, "bf_read: new_block"); if (!bf_read (b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf)) { - yaz_log (LOG_FATAL, "isamb: read fail for pos=%ld block=%ld", + yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX); abort (); } @@ -401,12 +493,13 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) p->cat = cat; b->file[cat].head_dirty = 1; memset (p->buf, 0, b->file[cat].head.block_size); - p->bytes = p->buf + ISAMB_DATA_OFFSET; + p->bytes = p->buf + b->file[cat].head.block_offset; p->leaf = leaf; p->size = 0; p->dirty = 1; p->deleted = 0; p->offset = 0; + p->no_items = 0; p->decodeClientData = (*b->method->codec.start)(); return p; } @@ -424,6 +517,7 @@ struct ISAMB_block *new_int (ISAMB b, int cat) static void check_block (ISAMB b, struct ISAMB_block *p) { + assert(b); /* mostly to make the compiler shut up about unused b */ if (p->leaf) { ; @@ -445,7 +539,10 @@ static void check_block (ISAMB b, struct ISAMB_block *p) assert (item_len > 0 && item_len < 80); src += item_len; decode_ptr (&src, &pos); - assert ((pos&CAT_MASK) == p->cat); + if ((pos&CAT_MASK) != p->cat) + { + assert ((pos&CAT_MASK) == p->cat); + } } } } @@ -468,11 +565,17 @@ void close_block (ISAMB b, struct ISAMB_block *p) } else if (p->dirty) { - int size = p->size + ISAMB_DATA_OFFSET; + int offset = b->file[p->cat].head.block_offset; + int size = p->size + offset; + char *dst = p->buf + 3; assert (p->size >= 0); + + /* memset becuase encode_ptr usually does not write all bytes */ + memset(p->buf, 0, b->file[p->cat].head.block_offset); p->buf[0] = p->leaf; p->buf[1] = size & 255; p->buf[2] = size >> 8; + encode_ptr(&dst, p->no_items); check_block(b, p); if (!get_block (b, p->pos, p->buf, 1)) { @@ -505,6 +608,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, char sub_item[DST_ITEM_MAX]; int sub_size; int more = 0; + zint diff_terms = 0; *sp = 0; @@ -521,9 +625,11 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, { sub_p1 = open_block (b, pos); assert (sub_p1); + diff_terms -= sub_p1->no_items; more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2, sub_item, &sub_size, src); + diff_terms += sub_p1->no_items; src = src0; break; } @@ -534,8 +640,17 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, { sub_p1 = open_block (b, pos); assert (sub_p1); + diff_terms -= sub_p1->no_items; more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2, sub_item, &sub_size, last_max_item); + diff_terms += sub_p1->no_items; + } + if (sub_p2) + diff_terms += sub_p2->no_items; + if (diff_terms) + { + p->dirty = 1; + p->no_items += diff_terms; } if (sub_p2) { @@ -562,13 +677,17 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, } p->size = dst - dst_buf; assert (p->size >= 0); + + if (p->size <= b->file[p->cat].head.block_max) { memcpy (startp, dst_buf, dst - dst_buf); } else { + struct ISAMB_block *sub_p3; zint split_size_tmp; + zint no_items_first_half = 0; int p_new_size; const char *half; src = dst_buf; @@ -576,27 +695,44 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, half = src + b->file[p->cat].head.block_size/2; decode_ptr (&src, &pos); + + /* read sub block so we can get no_items for it */ + sub_p3 = open_block(b, pos); + no_items_first_half += sub_p3->no_items; + close_block(b, sub_p3); + while (src <= half) { decode_ptr (&src, &split_size_tmp); - *split_size = split_size_tmp; + *split_size = (int) split_size_tmp; src += *split_size; decode_ptr (&src, &pos); + + /* read sub block so we can get no_items for it */ + sub_p3 = open_block(b, pos); + no_items_first_half += sub_p3->no_items; + close_block(b, sub_p3); } + /* p is first half */ p_new_size = src - dst_buf; memcpy (p->bytes, dst_buf, p_new_size); decode_ptr (&src, &split_size_tmp); - *split_size = split_size_tmp; + *split_size = (int) split_size_tmp; memcpy (split_item, src, *split_size); src += *split_size; + /* *sp is second half */ *sp = new_int (b, p->cat); (*sp)->size = endp - src; memcpy ((*sp)->bytes, src, (*sp)->size); p->size = p_new_size; + + /* adjust no_items in first&second half */ + (*sp)->no_items = p->no_items - no_items_first_half; + p->no_items = no_items_first_half; } p->dirty = 1; close_block (b, sub_p2); @@ -619,13 +755,16 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, void *c1 = (*b->method->codec.start)(); void *c2 = (*b->method->codec.start)(); int more = 1; - int quater = b->file[b->no_cat-1].head.block_max / CAT_MAX; - char *cut = dst_buf + quater * 2; + int quater = b->file[b->no_cat-1].head.block_max / 4; + char *mid_cut = dst_buf + quater * 2; + char *tail_cut = dst_buf + quater * 3; char *maxp = dst_buf + b->file[b->no_cat-1].head.block_max; char *half1 = 0; char *half2 = 0; char cut_item_buf[DST_ITEM_MAX]; int cut_item_size = 0; + int no_items = 0; /* number of items (total) */ + int no_items_1 = 0; /* number of items (first half) */ if (p && p->size) { @@ -637,34 +776,48 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, (*b->method->codec.decode)(c1, &file_item, &src); while (1) { - const char *dst_item = 0; - char *dst_0 = dst; + const char *dst_item = 0; /* resulting item to be inserted */ char *lookahead_next; int d = -1; if (lookahead_item) d = (*b->method->compare_item)(file_item_buf, lookahead_item); + /* d now holds comparison between existing file item and + lookahead item + d = 0: equal + d > 0: lookahead before file + d < 0: lookahead after file + */ if (d > 0) { + /* lookahead must be inserted */ dst_item = lookahead_item; + /* if this is not an insertion, it's really bad .. */ if (!*lookahead_mode) { - yaz_log (LOG_WARN, "isamb: Inconsistent register (1)"); + yaz_log (YLOG_WARN, "isamb: Inconsistent register (1)"); assert (*lookahead_mode); } } else dst_item = file_item_buf; + + if (!*lookahead_mode && d == 0) { + /* it's a deletion and they match so there is nothing to be + inserted anyway .. But mark the thing bad (file item + was part of input.. The item will not be part of output */ p->dirty = 1; } - else if (!half1 && dst > cut) + else if (!half1 && dst > mid_cut) { + /* we have reached the splitting point for the first time */ const char *dst_item_0 = dst_item; half1 = dst; /* candidate for splitting */ - + + /* encode the resulting item */ (*b->method->codec.encode)(c2, &dst, &dst_item); cut_item_size = dst_item - dst_item_0; @@ -672,30 +825,41 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, memcpy (cut_item_buf, dst_item_0, cut_item_size); half2 = dst; + no_items_1 = no_items; + no_items++; } else + { + /* encode the resulting item */ (*b->method->codec.encode)(c2, &dst, &dst_item); + no_items++; + } + + /* now move "pointers" .. result has been encoded .. */ if (d > 0) { + /* we must move the lookahead pointer */ + if (dst > maxp) - { - dst = dst_0; + /* no more room. Mark lookahead as "gone".. */ lookahead_item = 0; - } else { + /* move it really.. */ lookahead_next = lookahead_item; if (!(*stream->read_item)(stream->clientData, &lookahead_next, lookahead_mode)) { + /* end of stream reached: no "more" and no lookahead */ lookahead_item = 0; more = 0; } if (lookahead_item && max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { - /* max_item 1 */ + /* the lookahead goes beyond what we allow in this + leaf. Mark it as "gone" */ lookahead_item = 0; } @@ -704,6 +868,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } else if (d == 0) { + /* exact match .. move both pointers */ + lookahead_next = lookahead_item; if (!(*stream->read_item)(stream->clientData, &lookahead_next, lookahead_mode)) @@ -712,12 +878,13 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, more = 0; } if (src == endp) - break; - file_item = file_item_buf; + break; /* end of file stream reached .. */ + file_item = file_item_buf; /* move file pointer */ (*b->method->codec.decode)(c1, &file_item, &src); } else { + /* file pointer must be moved */ if (src == endp) break; file_item = file_item_buf; @@ -726,24 +893,29 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } } maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; + /* this loop runs when we are "appending" to a leaf page. That is + either it's empty (new) or all file items have been read in + previous loop */ while (lookahead_item) { char *dst_item; const char *src = lookahead_item; char *dst_0 = dst; + /* compare lookahead with max item */ if (max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { - /* max_item 2 */ + /* stop if we have reached the value of max item */ break; } if (!*lookahead_mode) { - yaz_log (LOG_WARN, "isamb: Inconsistent register (2)"); + /* this is append. So a delete is bad */ + yaz_log (YLOG_WARN, "isamb: Inconsistent register (2)"); abort(); } - else if (!half1 && dst > cut) + else if (!half1 && dst > tail_cut) { const char *src_0 = src; half1 = dst; /* candidate for splitting */ @@ -754,6 +926,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, assert(cut_item_size > 0); memcpy (cut_item_buf, src_0, cut_item_size); + no_items_1 = no_items; half2 = dst; } else @@ -764,6 +937,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, dst = dst_0; break; } + no_items++; if (p) p->dirty = 1; dst_item = lookahead_item; @@ -807,6 +981,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, /* first half */ p->size = half1 - dst_buf; memcpy (p->bytes, dst_buf, half1 - dst_buf); + p->no_items = no_items_1; /* second half */ *sp2 = new_leaf (b, p->cat); @@ -820,6 +995,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, memcpy (first_dst, half2, dst - half2); (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2); + (*sp2)->no_items = no_items - no_items_1; (*sp2)->dirty = 1; p->dirty = 1; memcpy (sub_item, cut_item_buf, cut_item_size); @@ -829,6 +1005,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { memcpy (p->bytes, dst_buf, dst - dst_buf); p->size = new_size; + p->no_items = no_items; } (*b->method->codec.stop)(c1); (*b->method->codec.stop)(c2); @@ -886,6 +1063,7 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) char *item_ptr; int i_mode; int more; + int must_delete = 0; if (b->cache < 0) { @@ -909,7 +1087,7 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) if (pos) p = open_block (b, pos); more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp, - sub_item, &sub_size, 0); + sub_item, &sub_size, 0); if (sp) { /* increase level of tree by one */ struct ISAMB_block *p2 = new_int (b, p->cat); @@ -923,18 +1101,30 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) encode_ptr (&dst, sp->pos); p2->size = dst - p2->bytes; + p2->no_items = p->no_items + sp->no_items; pos = p2->pos; /* return new super page */ close_block (b, sp); close_block (b, p2); } else + { pos = p->pos; /* return current one (again) */ + } + if (p->no_items == 0) + must_delete = 1; + else + must_delete = 0; close_block (b, p); } + if (must_delete) + { + isamb_unlink(b, pos); + return 0; + } return pos; } -ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level) +ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level, int scope) { ISAMB_PP pp = xmalloc (sizeof(*pp)); int i; @@ -949,6 +1139,7 @@ ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level) pp->no_blocks = 0; pp->skipped_numbers=0; pp->returned_numbers=0; + pp->scope=scope; for (i=0;iskipped_nodes[i] = pp->accessed_nodes[i]=0; while (1) @@ -975,9 +1166,9 @@ ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level) return pp; } -ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos, int scope) { - return isamb_pp_open_x (isamb, pos, 0); + return isamb_pp_open_x (isamb, pos, 0, scope); } void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) @@ -985,11 +1176,13 @@ void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) int i; if (!pp) return; - logf(LOG_DEBUG,"isamb_pp_close lev=%d returned %d values, skipped %d", + yaz_log(YLOG_DEBUG,"isamb_pp_close lev=%d returned "ZINT_FORMAT" values," + "skipped "ZINT_FORMAT, pp->maxlevel, pp->skipped_numbers, pp->returned_numbers); for (i=pp->maxlevel;i>=0;i--) if ( pp->skipped_nodes[i] || pp->accessed_nodes[i]) - logf(LOG_DEBUG,"isamb_pp_close level leaf-%d: %d read, %d skipped", i, + yaz_log(YLOG_DEBUG,"isamb_pp_close level leaf-%d: " + ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i, pp->accessed_nodes[i], pp->skipped_nodes[i]); pp->isamb->skipped_numbers += pp->skipped_numbers; pp->isamb->returned_numbers += pp->returned_numbers; @@ -1029,8 +1222,10 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), if (pos) { struct ISAMB_block *p = open_block (b, pos); - sprintf(prefix_str, "%*s " ZINT_FORMAT " cat=%d size=%d max=%d", level*2, "", - pos, p->cat, p->size, b->file[p->cat].head.block_max); + sprintf(prefix_str, "%*s " ZINT_FORMAT " cat=%d size=%d max=%d items=" + ZINT_FORMAT, level*2, "", + pos, p->cat, p->size, b->file[p->cat].head.block_max, + p->no_items); (*pr)(prefix_str); sprintf(prefix_str, "%*s " ZINT_FORMAT, level*2, "", pos); if (p->leaf) @@ -1040,7 +1235,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), const char *src = p->bytes + p->offset; char *dst = buf; (*b->method->codec.decode)(p->decodeClientData, &dst, &src); - (*b->method->log_item)(LOG_DEBUG, buf, prefix_str); + (*b->method->log_item)(YLOG_DEBUG, buf, prefix_str); p->offset = src - (char*) p->bytes; } assert(p->offset == p->size); @@ -1059,7 +1254,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), while (p->offset < p->size) { decode_ptr (&src, &item_len); - (*b->method->log_item)(LOG_DEBUG, src, prefix_str); + (*b->method->log_item)(YLOG_DEBUG, src, prefix_str); src += item_len; decode_ptr (&src, &sub); @@ -1074,83 +1269,14 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str)) { - return isamb_dump_r(b, pos, pr, 0); + isamb_dump_r(b, pos, pr, 0); } -#if 0 -/* Old isamb_pp_read that Adam wrote, kept as a reference in case we need to - debug the more complex pp_read that also forwards. May be deleted near end - of 2004, if it has not shown to be useful */ - - -int isamb_pp_read (ISAMB_PP pp, void *buf) -{ - char *dst = buf; - char *src; - struct ISAMB_block *p = pp->block[pp->level]; - if (!p) - return 0; - - while (p->offset == p->size) - { - int pos, item_len; - while (p->offset == p->size) - { - if (pp->level == 0) - return 0; - close_block (pp->isamb, pp->block[pp->level]); - pp->block[pp->level] = 0; - (pp->level)--; - p = pp->block[pp->level]; - assert (!p->leaf); - } - src = p->bytes + p->offset; - - decode_ptr (&src, &item_len); - src += item_len; - decode_ptr (&src, &pos); - - p->offset = src - (char*) p->bytes; - - ++(pp->level); - - while (1) - { - pp->block[pp->level] = p = open_block (pp->isamb, pos); - - pp->total_size += p->size; - pp->no_blocks++; - - if (p->leaf) - { - break; - } - src = p->bytes + p->offset; - decode_ptr (&src, &pos); - p->offset = src - (char*) p->bytes; - pp->level++; - } - } - assert (p->offset < p->size); - assert (p->leaf); - src = p->bytes + p->offset; - (*pp->isamb->method->codec.code_item)(ISAMC_DECODE, p->decodeClientData, - &dst, &src); - p->offset = src - (char*) p->bytes; - /* key_logdump_txt(LOG_DEBUG,buf, "isamb_pp_read returning 1"); */ - return 1; -} - -#else int isamb_pp_read (ISAMB_PP pp, void *buf) { return isamb_pp_forward(pp, buf, 0); } -#endif -#define NEW_FORWARD 1 - -#if NEW_FORWARD == 1 static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) { /* looks one node higher to see if we should be on this node at all */ @@ -1163,7 +1289,7 @@ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) assert(level>=0); if ( level == 0) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_on_right returning true for root"); + yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning true for root"); #endif return 1; /* we can never skip the root node */ } @@ -1176,20 +1302,20 @@ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) src=p->bytes + p->offset; decode_ptr(&src, &item_len); #if ISAMB_DEBUG - (*pp->isamb->method->codec.log_item)(LOG_DEBUG,untilbuf,"on_leaf: until"); - (*pp->isamb->method->codec.log_item)(LOG_DEBUG,src,"on_leaf: value"); + (*pp->isamb->method->codec.log_item)(YLOG_DEBUG,untilbuf,"on_leaf: until"); + (*pp->isamb->method->codec.log_item)(YLOG_DEBUG,src,"on_leaf: value"); #endif cmp=(*pp->isamb->method->compare_item)(untilbuf,src); - if (cmp<2) { + if (cmpscope) { /* cmp<2 */ #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_on_right returning true " + yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning true " "cmp=%d lev=%d ofs=%d",cmp,level,p->offset); #endif return 1; } else { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_on_right returning false " + yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning false " "cmp=%d lev=%d ofs=%d",cmp,level,p->offset); #endif return 0; @@ -1197,7 +1323,7 @@ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) } else { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_on_right at tail, looking higher " + yaz_log(YLOG_DEBUG,"isamb_pp_on_right at tail, looking higher " "lev=%d",level); #endif return isamb_pp_on_right_node(pp, level, untilbuf); @@ -1213,7 +1339,7 @@ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) assert(buf); if (p->offset == p->size) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_read_on_leaf returning 0 on node %d",p->pos); + yaz_log(YLOG_DEBUG,"isamb_pp_read_on_leaf returning 0 on node %d",p->pos); #endif return 0; /* at end of leaf */ } @@ -1221,11 +1347,10 @@ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) dst=buf; (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src); p->offset = src - (char*) p->bytes; - /* #if ISAMB_DEBUG - (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, "read_on_leaf returning 1"); + (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, buf, "read_on_leaf returning 1"); #endif -*/ + pp->returned_numbers++; return 1; } /* read_on_leaf */ @@ -1238,10 +1363,10 @@ static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf return 0; /* FIXME - this is an extra function call, inline the read? */ cmp=(*pp->isamb->method->compare_item)(untilbuf,buf); - if (cmp <2){ /* found a good one */ + if (cmp scope){ /* cmp<2 found a good one */ #if ISAMB_DEBUG if (skips) - logf(LOG_DEBUG, "isam_pp_fwd_on_leaf skipped %d items",skips); + yaz_log(YLOG_DEBUG, "isam_pp_fwd_on_leaf skipped %d items",skips); #endif pp->returned_numbers++; return 1; @@ -1261,7 +1386,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) const char *src; zint item_len; #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_climb_level starting " + yaz_log(YLOG_DEBUG,"isamb_pp_climb_level starting " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1270,7 +1395,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) if (pp->level==0) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_climb_level returning 0 at root"); + yaz_log(YLOG_DEBUG,"isamb_pp_climb_level returning 0 at root"); #endif return 0; } @@ -1280,7 +1405,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) (pp->level)--; p=pp->block[pp->level]; #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_climb_level climbed to level %d node %d ofs=%d", + yaz_log(YLOG_DEBUG,"isamb_pp_climb_level climbed to level %d node %d ofs=%d", pp->level, p->pos, p->offset); #endif assert(!p->leaf); @@ -1295,7 +1420,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) { /* skip the child we just came from */ #if ISAMB_DEBUG - logf(LOG_DEBUG,"isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d", + yaz_log(YLOG_DEBUG,"isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d", pp->level, p->offset, p->size); #endif assert (p->offset < p->size ); @@ -1310,7 +1435,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) } /* climb_level */ -static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) +static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) { /* scans a upper node until it finds a child <= untilbuf */ /* pp points to the key value, as always. pos is the child read from */ /* the buffer */ @@ -1325,7 +1450,7 @@ static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) zint nxtpos; #if ISAMB_DEBUG int skips=0; - logf(LOG_DEBUG,"isamb_pp_forward_unode starting " + yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode starting " "at level %d node %d ofs=%di sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1333,7 +1458,7 @@ static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) assert(p->offset <= p->size); if (p->offset == p->size) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward_unode returning at end " + yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning at end " "at level %d node %d ofs=%di sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1344,10 +1469,10 @@ static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) cmp=(*pp->isamb->method->compare_item)(untilbuf,src); src+=item_len; decode_ptr(&src,&nxtpos); - if (cmp<2) + if (cmpscope) /* cmp<2 */ { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward_unode returning a hit " + yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning a hit " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1361,7 +1486,7 @@ static int isamb_pp_forward_unode(ISAMB_PP pp, int pos, const void *untilbuf) #endif } #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward_unode returning at tail " + yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning at tail " "at level %d node %d ofs=%d sz=%d skips=%d", pp->level, p->pos, p->offset, p->size, skips); #endif @@ -1375,7 +1500,7 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *until const char *src; assert(!p->leaf); #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_descend_to_leaf " + yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf " "starting at lev %d node %d ofs=%d lf=%d u=%p", pp->level, p->pos, p->offset, p->leaf, untilbuf); #endif @@ -1388,7 +1513,7 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *until ++(pp->accessed_nodes[pp->maxlevel-pp->level]); ++(pp->no_blocks); #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_descend_to_leaf " + yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf " "got lev %d node %d lf=%d", pp->level, p->pos, p->leaf); #endif @@ -1400,7 +1525,7 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *until p->offset=src-(char*)p->bytes; isamb_pp_descend_to_leaf(pp,pos,untilbuf); #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_descend_to_leaf " + yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf " "returning at lev %d node %d ofs=%d lf=%d", pp->level, p->pos, p->offset, p->leaf); #endif @@ -1415,12 +1540,12 @@ static int isamb_pp_find_next_leaf(ISAMB_PP pp) return 1; } -static int isamb_pp_climb_desc(ISAMB_PP pp, void *buf, const void *untilbuf) +static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf) { /* climbs up and descends to a leaf where values >= *untilbuf are found */ ISAMB_P pos; #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; - logf(LOG_DEBUG,"isamb_pp_climb_desc starting " + yaz_log(YLOG_DEBUG,"isamb_pp_climb_desc starting " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1433,7 +1558,7 @@ static int isamb_pp_climb_desc(ISAMB_PP pp, void *buf, const void *untilbuf) isamb_pp_descend_to_leaf(pp, pos,untilbuf); #if ISAMB_DEBUG p = pp->block[pp->level]; - logf(LOG_DEBUG,"isamb_pp_climb_desc done " + yaz_log(YLOG_DEBUG,"isamb_pp_climb_desc done " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1445,22 +1570,22 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; assert(p->leaf); - logf(LOG_DEBUG,"isamb_pp_forward starting " - "at level %d node %d ofs=%d sz=%d u=%p", - pp->level, p->pos, p->offset, p->size,untilbuf); + yaz_log(YLOG_DEBUG,"isamb_pp_forward starting " + "at level %d node %d ofs=%d sz=%d u=%p sc=%d", + pp->level, p->pos, p->offset, p->size,untilbuf, scope); #endif if (untilbuf) { if (isamb_pp_forward_on_leaf( pp, buf, untilbuf)) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward (f) returning (A) " + yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning (A) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif return 1; } - if (! isamb_pp_climb_desc( pp, buf, untilbuf)) { + if (! isamb_pp_climb_desc( pp, untilbuf)) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) " + yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1469,7 +1594,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) do{ if (isamb_pp_forward_on_leaf( pp, buf, untilbuf)) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward (f) returning (C) " + yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning (C) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1485,7 +1610,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) * interface as the old fwd */ if (isamb_pp_read_on_leaf( pp, buf)) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward (read) returning (D) " + yaz_log(YLOG_DEBUG,"isamb_pp_forward (read) returning (D) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1493,7 +1618,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) } if (isamb_pp_find_next_leaf(pp)) { #if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward (read) returning (E) " + yaz_log(YLOG_DEBUG,"isamb_pp_forward (read) returning (E) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1504,381 +1629,18 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) } } /* isam_pp_forward (new version) */ -#elif NEW_FORWARD == 0 - -int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) -{ - /* pseudocode: - * while 1 - * while at end of node - * climb higher. If out, return 0 - * while not on a leaf (and not at its end) - * decode next - * if cmp - * descend to node - * decode next - * if cmp - * return 1 - */ - /* - * The upper nodes consist of a sequence of nodenumbers and keys - * When opening a block, the first node number is read in, and - * offset points to the first key, which is the upper limit of keys - * in the node just read. - */ - char *dst = buf; - const char *src; - struct ISAMB_block *p = pp->block[pp->level]; - int cmp; - int item_len; - int pos; - int nxtpos; - int descending=0; /* used to prevent a border condition error */ - if (!p) - return 0; -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward starting [%p] p=%d",pp,p->pos); - - (*pp->isamb->method->codec.log_item)(LOG_DEBUG, untilbuf, "until"); - (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, "buf"); -#endif - - while (1) - { - while ( (p->offset == p->size) && !descending ) - { /* end of this block - climb higher */ - assert (p->offset <= p->size); -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward climbing from l=%d", - pp->level); -#endif - if (pp->level == 0) - { -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward returning 0 at root"); -#endif - return 0; /* at end of the root, nothing left */ - } - close_block(pp->isamb, pp->block[pp->level]); - pp->block[pp->level]=0; - (pp->level)--; - p=pp->block[pp->level]; -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward climbed to node %d off=%d", - p->pos, p->offset); -#endif - assert(!p->leaf); - assert(p->offset <= p->size); - /* skip the child we have handled */ - if (p->offset != p->size) - { - src = p->bytes + p->offset; - decode_ptr(&src, &item_len); -#if ISAMB_DEBUG - (*pp->isamb->method->codec.log_item)(LOG_DEBUG, src, - " isamb_pp_forward " - "climb skipping old key"); -#endif - src += item_len; - decode_ptr(&src,&pos); - p->offset = src - (char*) p->bytes; - break; /* even if this puts us at the end of the block, we - need to descend to the last pos. UGLY coding, - clean up some day */ - } - } - if (!p->leaf) - { - src = p->bytes + p->offset; - if (p->offset == p->size) - cmp=-2 ; /* descend to the last node, as we have - no value to cmp */ - else - { - decode_ptr(&src, &item_len); -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward (B) on a high node. " - "ofs=%d sz=%d nxtpos=%d ", - p->offset,p->size,pos); - (*pp->isamb->method->codec.log_item)(LOG_DEBUG, src, ""); -#endif - if (untilbuf) - cmp=(*pp->isamb->method->compare_item)(untilbuf,src); - else - cmp=-2; - src += item_len; - decode_ptr(&src,&nxtpos); - } - if (cmp<2) - { -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isambb_pp_forward descending l=%d p=%d ", - pp->level, pos); -#endif - descending=1; /* prevent climbing for a while */ - ++(pp->level); - p = open_block(pp->isamb,pos); - pp->block[pp->level] = p ; - pp->total_size += p->size; - (pp->accessed_nodes[pp->maxlevel - pp->level])++; - pp->no_blocks++; - if ( !p->leaf) - { /* block starts with a pos */ - src = p->bytes + p->offset; - decode_ptr(&src,&pos); - p->offset=src-(char*) p->bytes; -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward: block %d starts with %d", - p->pos, pos); -#endif - } - } /* descend to the node */ - else - { /* skip the node */ - p->offset = src - (char*) p->bytes; - pos=nxtpos; - (pp->skipped_nodes[pp->maxlevel - pp->level -1])++; -#if ISAMB_DEBUG - logf(LOG_DEBUG, - "isamb_pp_forward: skipping block on level %d, noting " - "on %d (%d)", - pp->level, pp->maxlevel - pp->level-1 , - pp->skipped_nodes[pp->maxlevel - pp->level-1 ]); -#endif - /* 0 is always leafs, 1 is one level above leafs etc, no - * matter how high tree */ - } - } /* not on a leaf */ - else - { /* on a leaf */ - if (p->offset == p->size) { - descending = 0; - } - else - { - assert (p->offset < p->size); - src = p->bytes + p->offset; - dst=buf; - (*pp->isamb->method->codec.decode)(p->decodeClientData, - &dst, &src); - p->offset = src - (char*) p->bytes; - if (untilbuf) - cmp=(*pp->isamb->method->compare_item)(untilbuf,buf); - else - cmp=-2; -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_forward on a leaf. cmp=%d", - cmp); - (*pp->isamb->method->codec.log_item)(LOG_DEBUG, buf, ""); -#endif - if (cmp <2) - { -#if ISAMB_DEBUG - if (untilbuf) - { - (*pp->isamb->method->codec.log_item)( - LOG_DEBUG, buf, "isamb_pp_forward returning 1"); - } - else - { - (*pp->isamb->method->codec.log_item)( - LOG_DEBUG, buf, "isamb_pp_read returning 1 (fwd)"); - } -#endif - pp->returned_numbers++; - return 1; - } - else - pp->skipped_numbers++; - } - } /* leaf */ - } /* main loop */ -} - -#elif NEW_FORWARD == 2 - -int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilb) -{ - char *dst = buf; - const char *src; - struct ISAMB_block *p = pp->block[pp->level]; - if (!p) - return 0; - -again: - while (p->offset == p->size) - { - int pos, item_len; - while (p->offset == p->size) - { - if (pp->level == 0) - return 0; - close_block (pp->isamb, pp->block[pp->level]); - pp->block[pp->level] = 0; - (pp->level)--; - p = pp->block[pp->level]; - assert (!p->leaf); - } - - assert(!p->leaf); - src = p->bytes + p->offset; - - decode_ptr (&src, &item_len); - src += item_len; - decode_ptr (&src, &pos); - - p->offset = src - (char*) p->bytes; - - src = p->bytes + p->offset; - - while(1) - { - if (!untilb || p->offset == p->size) - break; - assert(p->offset < p->size); - decode_ptr (&src, &item_len); - if ((*pp->isamb->method->compare_item)(untilb, src) <= 1) - break; - src += item_len; - decode_ptr (&src, &pos); - p->offset = src - (char*) p->bytes; - } - - pp->level++; - - while (1) - { - pp->block[pp->level] = p = open_block (pp->isamb, pos); - - pp->total_size += p->size; - pp->no_blocks++; - - if (p->leaf) - { - break; - } - - src = p->bytes + p->offset; - while(1) - { - decode_ptr (&src, &pos); - p->offset = src - (char*) p->bytes; - - if (!untilb || p->offset == p->size) - break; - assert(p->offset < p->size); - decode_ptr (&src, &item_len); - if ((*pp->isamb->method->compare_item)(untilb, src) <= 1) - break; - src += item_len; - } - pp->level++; - } - } - assert (p->offset < p->size); - assert (p->leaf); - while(1) - { - char *dst0 = dst; - src = p->bytes + p->offset; - (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src); - p->offset = src - (char*) p->bytes; - if (!untilb || (*pp->isamb->method->compare_item)(untilb, dst0) <= 1) - break; - dst = dst0; - if (p->offset == p->size) goto again; - } - /* key_logdump_txt(LOG_DEBUG,buf, "isamb_pp_read returning 1"); */ - return 1; -} - -#endif - -int isamb_pp_num (ISAMB_PP pp) -{ - return 1; -} - -static void isamb_pp_leaf_pos( ISAMB_PP pp, - double *current, double *total, - void *dummybuf ) -{ - struct ISAMB_block *p = pp->block[pp->level]; - const char *src=p->bytes; - char *end=p->bytes+p->size; - char *cur=p->bytes+p->offset; - char *dst; - void *decodeClientData; - assert(p->offset <= p->size); - assert(cur <= end); - assert(p->leaf); - *current=0; - *total=0; - - decodeClientData = (pp->isamb->method->codec.start)(); - - while(src < end) { - dst=dummybuf; - (*pp->isamb->method->codec.decode)(decodeClientData,&dst, &src); - assert(dst<(char*) dummybuf+100); /*FIXME */ - (*total)++; - if (src<=cur) - (*current)++; - } -#if ISAMB_DEBUG - logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur= %0.1f tot=%0.1f " - " ofs=%d sz=%d lev=%d", - *current, *total, p->offset, p->size, pp->level); -#endif - assert(src==end); - (pp->isamb->method->codec.stop)(decodeClientData); -} - -static void isamb_pp_upper_pos( ISAMB_PP pp, double *current, double *total, - zint size, int level ) -{ /* estimates total/current occurrences from here up, excl leaf */ - struct ISAMB_block *p = pp->block[level]; - const char *src=p->bytes; - char *end=p->bytes+p->size; - char *cur=p->bytes+p->offset; - zint item_size; - ISAMB_P child; - - assert(level>=0); - assert(!p->leaf); - -#if ISAMB_DEBUG - logf(LOG_DEBUG,"isamb_pp_upper_pos at beginning l=%d " - "cur="ZINT_FORMAT" tot="ZINT_FORMAT - " ofs=%d sz=%d pos=" ZINT_FORMAT, - level, *current, *total, p->offset, p->size, p->pos); -#endif - assert (p->offset <= p->size); - decode_ptr (&src, &child ); /* first child */ - while(src < end) { - if (src!=cur) { - *total += size; - if (src < cur) - *current +=size; - } - decode_ptr (&src, &item_size ); - assert(src+item_size<=end); - src += item_size; - decode_ptr (&src, &child ); - } - if (level>0) - isamb_pp_upper_pos(pp, current, total, *total, level-1); -} /* upper_pos */ - void isamb_pp_pos( ISAMB_PP pp, double *current, double *total ) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ struct ISAMB_block *p = pp->block[pp->level]; - char dummy[100]; /* 100 bytes/entry must be enough */ assert(total); assert(current); assert(p->leaf); - isamb_pp_leaf_pos(pp,current, total, dummy); - if (pp->level>0) - isamb_pp_upper_pos(pp, current, total, *total, pp->level-1); + + *total = pp->block[0]->no_items; + *current = (double) pp->returned_numbers; +#if ISAMB_DEBUG + yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn=" + ZINT_FORMAT, *current, *total, pp->returned_numbers); +#endif }