X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=3e85ee1e0f97097afb488f4a7e4a17efc3204592;hb=3fe5d30485d3fc95b24ee5e7dc75971447ecb5aa;hp=ddc936b7e16fea3f8b234f32032e07fd23a9041f;hpb=407d3ca3408a0b117616ca9c6fe92531d3b10d6c;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index ddc936b..3e85ee1 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,6 +1,6 @@ -/* $Id: isamb.c,v 1.67 2005-01-15 18:43:05 adam Exp $ - Copyright (C) 1995-2005 - Index Data Aps +/* $Id: isamb.c,v 1.88 2006-12-12 13:46:41 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS This file is part of the Zebra server. @@ -15,11 +15,12 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include #include #include #include @@ -44,7 +45,7 @@ struct ISAMB_head { int block_offset; }; -/* if 1, interior nodes items are encoded; 0 if not encoded */ +/* if 1, upper nodes items are encoded; 0 if not encoded */ #define INT_ENCODE 1 /* maximum size of encoded buffer */ @@ -52,8 +53,9 @@ struct ISAMB_head { #define ISAMB_MAX_LEVEL 10 /* approx 2*max page + max size of item */ -#define DST_BUF_SIZE 16840 +#define DST_BUF_SIZE (2*4096+300) +/* should be maximum block size of multiple thereof */ #define ISAMB_CACHE_ENTRY_SIZE 4096 /* CAT_MAX: _must_ be power of 2 */ @@ -71,7 +73,7 @@ struct ISAMB_head { #define ISAMB_PTR_CODEC 1 struct ISAMB_cache_entry { - ISAMB_P pos; + ISAM_P pos; unsigned char *buf; int dirty; int hits; @@ -98,10 +100,14 @@ struct ISAMB_s { zint returned_numbers; zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */ zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint number_of_int_splits; + zint number_of_leaf_splits; + int enable_int_count; /* whether we count nodes (or not) */ + int cache_size; /* size of blocks to cache (if cache=1) */ }; struct ISAMB_block { - ISAMB_P pos; + ISAM_P pos; int cat; int size; int leaf; @@ -118,7 +124,7 @@ struct ISAMB_block { struct ISAMB_PP_s { ISAMB isamb; - ISAMB_P pos; + ISAM_P pos; int level; int maxlevel; /* total depth */ zint total_size; @@ -140,10 +146,10 @@ static void encode_ptr(char **dst, zint pos) while (pos > 127) { - *bp++ = 128 | (pos & 127); + *bp++ = (unsigned char) (128 | (pos & 127)); pos = pos >> 7; } - *bp++ = pos; + *bp++ = (unsigned char) pos; *dst = (char *) bp; } #else @@ -156,14 +162,13 @@ static void encode_ptr(char **dst, zint pos) #define decode_item_len decode_ptr #if ISAMB_PTR_CODEC -static void decode_ptr(const char **src1, zint *pos) +static void decode_ptr(const char **src, zint *pos) { - const unsigned char **src = (const unsigned char **) src1; zint d = 0; unsigned char c; unsigned r = 0; - while (((c = *(*src)++) & 128)) + while (((c = *(const unsigned char *)((*src)++)) & 128)) { d += ((zint) (c & 127) << r); r += 7; @@ -179,32 +184,62 @@ static void decode_ptr(const char **src, zint *pos) } #endif + +void isamb_set_int_count(ISAMB b, int v) +{ + b->enable_int_count = v; +} + +void isamb_set_cache_size(ISAMB b, int v) +{ + b->cache_size = v; +} + ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, - int cache) + int cache) { ISAMB isamb = xmalloc(sizeof(*isamb)); int i, b_size = ISAMB_MIN_SIZE; isamb->bfs = bfs; isamb->method = (ISAMC_M *) xmalloc(sizeof(*method)); - memcpy (isamb->method, method, sizeof(*method)); + memcpy(isamb->method, method, sizeof(*method)); isamb->no_cat = CAT_NO; isamb->log_io = 0; isamb->log_freelist = 0; isamb->cache = cache; isamb->skipped_numbers = 0; isamb->returned_numbers = 0; - for (i = 0;iskipped_nodes[i]= isamb->accessed_nodes[i]=0; + isamb->number_of_int_splits = 0; + isamb->number_of_leaf_splits = 0; + isamb->enable_int_count = 1; + isamb->cache_size = 40; + + for (i = 0; iskipped_nodes[i] = isamb->accessed_nodes[i] = 0; - assert(cache == 0); + if (cache == -1) + { + yaz_log(YLOG_WARN, "isamb_open %s. Degraded TEST mode", name); + } + else + { + assert(cache == 0 || cache == 1); + } isamb->file = xmalloc(sizeof(*isamb->file) * isamb->no_cat); + + for (i = 0; i < isamb->no_cat; i++) + { + isamb->file[i].bf = 0; + isamb->file[i].head_dirty = 0; + isamb->file[i].cache_entries = 0; + } + for (i = 0; i < isamb->no_cat; i++) { char fname[DST_BUF_SIZE]; char hbuf[DST_BUF_SIZE]; - isamb->file[i].cache_entries = 0; - isamb->file[i].head_dirty = 0; + sprintf(fname, "%s%c", name, i+'A'); if (cache) isamb->file[i].bf = bf_open(bfs, fname, ISAMB_CACHE_ENTRY_SIZE, @@ -212,10 +247,17 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, else isamb->file[i].bf = bf_open(bfs, fname, b_size, writeflag); + if (!isamb->file[i].bf) + { + isamb_close(isamb); + return 0; + } + /* fill-in default values (for empty isamb) */ isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; isamb->file[i].head.last_block = isamb->file[i].head.first_block; isamb->file[i].head.block_size = b_size; + assert(b_size <= ISAMB_CACHE_ENTRY_SIZE); #if ISAMB_PTR_CODEC if (i == isamb->no_cat-1 || b_size > 128) isamb->file[i].head.block_offset = 8; @@ -265,9 +307,9 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, decode_ptr(&src, &isamb->file[i].head.first_block); decode_ptr(&src, &isamb->file[i].head.last_block); decode_ptr(&src, &zint_tmp); - isamb->file[i].head.block_size = zint_tmp; + isamb->file[i].head.block_size = (int) zint_tmp; decode_ptr(&src, &zint_tmp); - isamb->file[i].head.block_max = zint_tmp; + isamb->file[i].head.block_max = (int) zint_tmp; decode_ptr(&src, &isamb->file[i].head.free_list); } assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset); @@ -298,7 +340,7 @@ static void flush_blocks (ISAMB b, int cat) } } -static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) +static int cache_block (ISAMB b, ISAM_P pos, unsigned char *userbuf, int wr) { int cat = (int) (pos&CAT_MASK); int off = (int) (((pos/CAT_MAX) & @@ -335,15 +377,14 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) return 1; } } - if (no >= 40) + if (no >= b->cache_size) { - assert (no == 40); assert (ce_last && *ce_last); ce_this = *ce_last; *ce_last = 0; /* remove the last entry from list */ if (ce_this->dirty) { - yaz_log(b->log_io, "bf_write: get_block"); + yaz_log(b->log_io, "bf_write: cache_block"); bf_write(b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); } xfree(ce_this->buf); @@ -354,7 +395,7 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) b->file[cat].cache_entries = ce_this; ce_this->buf = xmalloc(ISAMB_CACHE_ENTRY_SIZE); ce_this->pos = norm; - yaz_log(b->log_io, "bf_read: get_block"); + yaz_log(b->log_io, "bf_read: cache_block"); if (!bf_read(b->file[cat].bf, norm, 0, 0, ce_this->buf)) memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE); if (wr) @@ -374,7 +415,7 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) void isamb_close (ISAMB isamb) { int i; - for (i = 0;isamb->accessed_nodes[i];i++) + for (i = 0; isamb->accessed_nodes[i]; i++) yaz_log(YLOG_DEBUG, "isamb_close level leaf-%d: "ZINT_FORMAT" read, " ZINT_FORMAT" skipped", i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]); @@ -414,7 +455,8 @@ void isamb_close (ISAMB isamb) bf_write(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); } } - bf_close (isamb->file[i].bf); + if (isamb->file[i].bf) + bf_close (isamb->file[i].bf); } xfree(isamb->file); xfree(isamb->method); @@ -431,7 +473,7 @@ void isamb_close (ISAMB isamb) * Reserve 5 bytes for large block sizes. 1 for small ones .. Number of items. We can thus have at most 2^40 nodes. */ -static struct ISAMB_block *open_block(ISAMB b, ISAMC_P pos) +static struct ISAMB_block *open_block(ISAMB b, ISAM_P pos) { int cat = (int) (pos&CAT_MASK); const char *src; @@ -445,17 +487,17 @@ static struct ISAMB_block *open_block(ISAMB b, ISAMC_P pos) p->buf = xmalloc(b->file[cat].head.block_size); p->cbuf = 0; - if (!get_block (b, pos, p->buf, 0)) + if (!cache_block (b, pos, p->buf, 0)) { yaz_log(b->log_io, "bf_read: open_block"); - if (!bf_read(b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf)) + if (bf_read(b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf) != 1) { yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) pos, (long) pos/CAT_MAX); - abort(); + zebra_exit("isamb:open_block"); } } - p->bytes = p->buf + offset; + p->bytes = (char *)p->buf + offset; p->leaf = p->buf[0]; p->size = (p->buf[1] + 256 * p->buf[2]) - offset; if (p->size < 0) @@ -464,7 +506,7 @@ static struct ISAMB_block *open_block(ISAMB b, ISAMC_P pos) p->size, pos); } assert (p->size >= 0); - src = p->buf + 3; + src = (char*) p->buf + 3; decode_ptr(&src, &p->no_items); p->offset = 0; @@ -491,14 +533,14 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) { p->pos = b->file[cat].head.free_list; assert((p->pos & CAT_MASK) == cat); - if (!get_block (b, p->pos, p->buf, 0)) + if (!cache_block (b, p->pos, p->buf, 0)) { yaz_log(b->log_io, "bf_read: new_block"); if (!bf_read(b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf)) { yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX); - abort (); + zebra_exit("isamb:new_block"); } } yaz_log(b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, @@ -508,7 +550,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) p->cat = cat; b->file[cat].head_dirty = 1; memset (p->buf, 0, b->file[cat].head.block_size); - p->bytes = p->buf + b->file[cat].head.block_offset; + p->bytes = (char*)p->buf + b->file[cat].head.block_offset; p->leaf = leaf; p->size = 0; p->dirty = 1; @@ -543,7 +585,7 @@ static void check_block (ISAMB b, struct ISAMB_block *p) char *startp = p->bytes; const char *src = startp; char *endp = p->bytes + p->size; - ISAMB_P pos; + ISAM_P pos; void *c1 = (*b->method->codec.start)(); decode_ptr(&src, &pos); @@ -581,7 +623,7 @@ void close_block(ISAMB b, struct ISAMB_block *p) p->pos, p->cat, p->pos/CAT_MAX); memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(zint)); b->file[p->cat].head.free_list = p->pos; - if (!get_block (b, p->pos, p->buf, 1)) + if (!cache_block (b, p->pos, p->buf, 1)) { yaz_log(b->log_io, "bf_write: close_block (deleted)"); bf_write(b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); @@ -591,7 +633,7 @@ void close_block(ISAMB b, struct ISAMB_block *p) { int offset = b->file[p->cat].head.block_offset; int size = p->size + offset; - char *dst = p->buf + 3; + char *dst = (char*)p->buf + 3; assert (p->size >= 0); /* memset becuase encode_ptr usually does not write all bytes */ @@ -601,7 +643,7 @@ void close_block(ISAMB b, struct ISAMB_block *p) p->buf[2] = size >> 8; encode_ptr(&dst, p->no_items); check_block(b, p); - if (!get_block (b, p->pos, p->buf, 1)) + if (!cache_block (b, p->pos, p->buf, 1)) { yaz_log(b->log_io, "bf_write: close_block"); bf_write(b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); @@ -627,7 +669,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, char *startp = p->bytes; const char *src = startp; char *endp = p->bytes + p->size; - ISAMB_P pos; + ISAM_P pos; struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0; char sub_item[DST_ITEM_MAX]; int sub_size; @@ -703,7 +745,9 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, /* there was a split - must insert pointer in this one */ char dst_buf[DST_BUF_SIZE]; char *dst = dst_buf; - +#if INT_ENCODE + const char *sub_item_ptr = sub_item; +#endif assert (sub_size < 80 && sub_size > 1); memcpy (dst, startp, src - startp); @@ -711,7 +755,6 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, dst += src - startp; #if INT_ENCODE - const char *sub_item_ptr = sub_item; (*b->method->codec.reset)(c1); (*b->method->codec.encode)(c1, &dst, &sub_item_ptr); #else @@ -730,17 +773,20 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, p->size = dst - dst_buf; assert (p->size >= 0); - if (p->size <= b->file[p->cat].head.block_max) { /* it fits OK in this block */ memcpy (startp, dst_buf, dst - dst_buf); + + close_block(b, sub_p2); } else { /* must split _this_ block as well .. */ struct ISAMB_block *sub_p3; #if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; #else zint split_size_tmp; #endif @@ -749,20 +795,27 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, const char *half; src = dst_buf; endp = dst; + + b->number_of_int_splits++; + + p->dirty = 1; + close_block(b, sub_p2); half = src + b->file[p->cat].head.block_size/2; decode_ptr(&src, &pos); - /* read sub block so we can get no_items for it */ - sub_p3 = open_block(b, pos); - no_items_first_half += sub_p3->no_items; - close_block(b, sub_p3); + if (b->enable_int_count) + { + /* read sub block so we can get no_items for it */ + sub_p3 = open_block(b, pos); + no_items_first_half += sub_p3->no_items; + close_block(b, sub_p3); + } while (src <= half) { #if INT_ENCODE - char file_item_buf[DST_ITEM_MAX]; - char *file_item = file_item_buf; + file_item = file_item_buf; (*b->method->codec.reset)(c1); (*b->method->codec.decode)(c1, &file_item, &src); #else @@ -772,18 +825,20 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, #endif decode_ptr(&src, &pos); - /* read sub block so we can get no_items for it */ - sub_p3 = open_block(b, pos); - no_items_first_half += sub_p3->no_items; - close_block(b, sub_p3); + if (b->enable_int_count) + { + /* read sub block so we can get no_items for it */ + sub_p3 = open_block(b, pos); + no_items_first_half += sub_p3->no_items; + close_block(b, sub_p3); + } } /* p is first half */ p_new_size = src - dst_buf; memcpy (p->bytes, dst_buf, p_new_size); #if INT_ENCODE - char file_item_buf[DST_ITEM_MAX]; - char *file_item = file_item_buf; + file_item = file_item_buf; (*b->method->codec.reset)(c1); (*b->method->codec.decode)(c1, &file_item, &src); *split_size = file_item - file_item_buf; @@ -805,8 +860,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, (*sp)->no_items = p->no_items - no_items_first_half; p->no_items = no_items_first_half; } - p->dirty = 1; - close_block(b, sub_p2); + p->dirty = 1; } close_block(b, sub_p1); (*b->method->codec.stop)(c1); @@ -837,6 +891,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int cut_item_size = 0; int no_items = 0; /* number of items (total) */ int no_items_1 = 0; /* number of items (first half) */ + int inserted_dst_bytes = 0; if (p && p->size) { @@ -850,6 +905,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { const char *dst_item = 0; /* resulting item to be inserted */ char *lookahead_next; + char *dst_0 = dst; int d = -1; if (lookahead_item) @@ -869,13 +925,12 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (!*lookahead_mode) { yaz_log(YLOG_WARN, "isamb: Inconsistent register (1)"); - assert (*lookahead_mode); + assert(*lookahead_mode); } } else dst_item = file_item_buf; - if (!*lookahead_mode && d == 0) { /* it's a deletion and they match so there is nothing to be @@ -912,7 +967,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { /* we must move the lookahead pointer */ - if (dst > maxp) + inserted_dst_bytes += (dst - dst_0); + if (inserted_dst_bytes >= quater) /* no more room. Mark lookahead as "gone".. */ lookahead_item = 0; else @@ -964,17 +1020,19 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } } } - maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; + /* this loop runs when we are "appending" to a leaf page. That is either it's empty (new) or all file items have been read in previous loop */ + + maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; while (lookahead_item) { char *dst_item; const char *src = lookahead_item; char *dst_0 = dst; - /* compare lookahead with max item */ + /* if we have a lookahead item, we stop if we exceed the value of it */ if (max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { @@ -985,7 +1043,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { /* this is append. So a delete is bad */ yaz_log(YLOG_WARN, "isamb: Inconsistent register (2)"); - abort(); + assert(*lookahead_mode); } else if (!half1 && dst > tail_cut) { @@ -1052,6 +1110,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, /* first half */ p->size = half1 - dst_buf; + assert(p->size <= b->file[p->cat].head.block_max); memcpy (p->bytes, dst_buf, half1 - dst_buf); p->no_items = no_items_1; @@ -1060,6 +1119,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, (*b->method->codec.reset)(c2); + b->number_of_leaf_splits++; + first_dst = (*sp2)->bytes; (*b->method->codec.encode)(c2, &first_dst, &cut_item); @@ -1067,6 +1128,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, memcpy (first_dst, half2, dst - half2); (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2); + assert((*sp2)->size <= b->file[p->cat].head.block_max); (*sp2)->no_items = no_items - no_items_1; (*sp2)->dirty = 1; p->dirty = 1; @@ -1100,7 +1162,7 @@ int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, sub_size, max_item); } -int isamb_unlink (ISAMB b, ISAMC_P pos) +int isamb_unlink (ISAMB b, ISAM_P pos) { struct ISAMB_block *p1; @@ -1112,10 +1174,11 @@ int isamb_unlink (ISAMB b, ISAMC_P pos) { zint sub_p; const char *src = p1->bytes + p1->offset; - +#if INT_ENCODE + void *c1 = (*b->method->codec.start)(); +#endif decode_ptr(&src, &sub_p); isamb_unlink(b, sub_p); - void *c1 = (*b->method->codec.start)(); while (src != p1->bytes + p1->size) { @@ -1132,13 +1195,15 @@ int isamb_unlink (ISAMB b, ISAMC_P pos) decode_ptr(&src, &sub_p); isamb_unlink(b, sub_p); } +#if INT_ENCODE (*b->method->codec.stop)(c1); +#endif } close_block(b, p1); return 0; } -ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) +void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream) { char item_buf[DST_ITEM_MAX]; char *item_ptr; @@ -1155,7 +1220,8 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); } - return 1; + *pos = 1; + return; } item_ptr = item_buf; more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); @@ -1165,20 +1231,22 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) char sub_item[DST_ITEM_MAX]; int sub_size; - if (pos) - p = open_block(b, pos); + if (*pos) + p = open_block(b, *pos); more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp, sub_item, &sub_size, 0); if (sp) { /* increase level of tree by one */ struct ISAMB_block *p2 = new_int (b, p->cat); char *dst = p2->bytes + p2->size; +#if INT_ENCODE void *c1 = (*b->method->codec.start)(); - + const char *sub_item_ptr = sub_item; +#endif + encode_ptr(&dst, p->pos); assert (sub_size < 80 && sub_size > 1); #if INT_ENCODE - const char *sub_item_ptr = sub_item; (*b->method->codec.reset)(c1); (*b->method->codec.encode)(c1, &dst, &sub_item_ptr); #else @@ -1190,14 +1258,16 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) p2->size = dst - p2->bytes; p2->no_items = p->no_items + sp->no_items; - pos = p2->pos; /* return new super page */ + *pos = p2->pos; /* return new super page */ close_block(b, sp); close_block(b, p2); +#if INT_ENCODE (*b->method->codec.stop)(c1); +#endif } else { - pos = p->pos; /* return current one (again) */ + *pos = p->pos; /* return current one (again) */ } if (p->no_items == 0) must_delete = 1; @@ -1207,13 +1277,12 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) } if (must_delete) { - isamb_unlink(b, pos); - return 0; + isamb_unlink(b, *pos); + *pos = 0; } - return pos; } -ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAMB_P pos, int *level, int scope) +ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope) { ISAMB_PP pp = xmalloc(sizeof(*pp)); int i; @@ -1231,8 +1300,8 @@ ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAMB_P pos, int *level, int scope) pp->skipped_numbers = 0; pp->returned_numbers = 0; pp->scope = scope; - for (i = 0;iskipped_nodes[i] = pp->accessed_nodes[i]=0; + for (i = 0; iskipped_nodes[i] = pp->accessed_nodes[i] = 0; while (1) { struct ISAMB_block *p = open_block(isamb, pos); @@ -1255,12 +1324,12 @@ ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAMB_P pos, int *level, int scope) return pp; } -ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos, int scope) +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAM_P pos, int scope) { return isamb_pp_open_x(isamb, pos, 0, scope); } -void isamb_pp_close_x(ISAMB_PP pp, int *size, int *blocks) +void isamb_pp_close_x(ISAMB_PP pp, zint *size, zint *blocks) { int i; if (!pp) @@ -1268,14 +1337,14 @@ void isamb_pp_close_x(ISAMB_PP pp, int *size, int *blocks) yaz_log(YLOG_DEBUG, "isamb_pp_close lev=%d returned "ZINT_FORMAT" values, " "skipped "ZINT_FORMAT, pp->maxlevel, pp->skipped_numbers, pp->returned_numbers); - for (i = pp->maxlevel;i>=0;i--) + for (i = pp->maxlevel; i>=0; i--) if (pp->skipped_nodes[i] || pp->accessed_nodes[i]) yaz_log(YLOG_DEBUG, "isamb_pp_close level leaf-%d: " ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i, pp->accessed_nodes[i], pp->skipped_nodes[i]); pp->isamb->skipped_numbers += pp->skipped_numbers; pp->isamb->returned_numbers += pp->returned_numbers; - for (i = pp->maxlevel;i>=0;i--) + for (i = pp->maxlevel; i>=0; i--) { pp->isamb->accessed_nodes[i] += pp->accessed_nodes[i]; pp->isamb->skipped_nodes[i] += pp->skipped_nodes[i]; @@ -1303,7 +1372,7 @@ void isamb_pp_close (ISAMB_PP pp) } /* simple recursive dumper .. */ -static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), +static void isamb_dump_r (ISAMB b, ISAM_P pos, void (*pr)(const char *str), int level) { char buf[1024]; @@ -1332,7 +1401,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), else { const char *src = p->bytes + p->offset; - ISAMB_P sub; + ISAM_P sub; decode_ptr(&src, &sub); p->offset = src - (char*) p->bytes; @@ -1365,7 +1434,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), } } -void isamb_dump(ISAMB b, ISAMB_P pos, void (*pr)(const char *str)) +void isamb_dump(ISAMB b, ISAM_P pos, void (*pr)(const char *str)) { isamb_dump_r(b, pos, pr, 0); } @@ -1499,12 +1568,11 @@ static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf } } /* forward_on_leaf */ -static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) +static int isamb_pp_climb_level(ISAMB_PP pp, ISAM_P *pos) { /* climbs higher in the tree, until finds a level with data left */ /* returns the node to (consider to) descend to in *pos) */ struct ISAMB_block *p = pp->block[pp->level]; const char *src; - ISAMB b = pp->isamb; #if ISAMB_DEBUG yaz_log(YLOG_DEBUG, "isamb_pp_climb_level starting " "at level %d node %d ofs=%d sz=%d", @@ -1521,7 +1589,7 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) } assert(pp->level>0); close_block(pp->isamb, pp->block[pp->level]); - pp->block[pp->level]=0; + pp->block[pp->level] = 0; (pp->level)--; p = pp->block[pp->level]; #if ISAMB_DEBUG @@ -1539,6 +1607,14 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) } else { +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + ISAMB b = pp->isamb; + void *c1 = (*b->method->codec.start)(); +#else + zint item_len; +#endif /* skip the child we just came from */ #if ISAMB_DEBUG yaz_log(YLOG_DEBUG, "isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d", @@ -1547,13 +1623,9 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) assert (p->offset < p->size); src = p->bytes + p->offset; #if INT_ENCODE - char file_item_buf[DST_ITEM_MAX]; - char *file_item = file_item_buf; - void *c1 = (*b->method->codec.start)(); (*b->method->codec.decode)(c1, &file_item, &src); (*b->method->codec.stop)(c1); #else - zint item_len; decode_item_len(&src, &item_len); src += item_len; #endif @@ -1636,7 +1708,7 @@ static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) } /* forward_unode */ -static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, +static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAM_P pos, const void *untilbuf) { /* climbs down the tree, from pos, to the leftmost leaf */ struct ISAMB_block *p = pp->block[pp->level]; @@ -1676,7 +1748,7 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, static int isamb_pp_find_next_leaf(ISAMB_PP pp) { /* finds the next leaf by climbing up and down */ - ISAMB_P pos; + ISAM_P pos; if (!isamb_pp_climb_level(pp, &pos)) return 0; isamb_pp_descend_to_leaf(pp, pos, 0); @@ -1685,7 +1757,7 @@ static int isamb_pp_find_next_leaf(ISAMB_PP pp) static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf) { /* climbs up and descends to a leaf where values >= *untilbuf are found */ - ISAMB_P pos; + ISAM_P pos; #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; yaz_log(YLOG_DEBUG, "isamb_pp_climb_desc starting " @@ -1741,9 +1813,9 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) if (isamb_pp_forward_on_leaf(pp, buf, untilbuf)) { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning (C) " - "at level %d node %d ofs=%d sz=%d", - pp->level, p->pos, p->offset, p->size); + yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning (c) " + "at level %d node %d ofs=%d sz=%d", + pp->level, p->pos, p->offset, p->size); #endif return 1; } @@ -1781,12 +1853,12 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) void isamb_pp_pos(ISAMB_PP pp, double *current, double *total) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ - struct ISAMB_block *p = pp->block[pp->level]; assert(total); assert(current); - assert(p->leaf); + + /* if end-of-stream PP may not be leaf */ - *total = pp->block[0]->no_items; + *total = (double) (pp->block[0]->no_items); *current = (double) pp->returned_numbers; #if ISAMB_DEBUG yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn=" @@ -1805,7 +1877,7 @@ int isamb_pp_forward2(ISAMB_PP pp, void *buf, const void *untilb) again: while (p->offset == p->size) { - ISAMB_P pos; + ISAM_P pos; #if INT_ENCODE const char *src_0; void *c1; @@ -1926,3 +1998,22 @@ again: } return 1; } + +zint isamb_get_int_splits(ISAMB b) +{ + return b->number_of_int_splits; +} + +zint isamb_get_leaf_splits(ISAMB b) +{ + return b->number_of_leaf_splits; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +