X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=isamb%2Fisamb.c;h=1f57c7313550ed04cdf1dc32442a0ae614c461eb;hp=72bb842cc1ce2a02a9c8eedc80a6a91db6feefda;hb=49d0ee122a9f86ec2967b577dcc297c501785edd;hpb=83bf26c44f20a65c1a07e1fcb59649540a3c2193 diff --git a/isamb/isamb.c b/isamb/isamb.c index 72bb842..1f57c73 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,6 +1,6 @@ -/* $Id: isamb.c,v 1.63 2005-01-02 18:50:53 adam Exp $ +/* $Id: isamb.c,v 1.77 2005-04-15 10:47:49 adam Exp $ Copyright (C) 1995-2005 - Index Data Aps + Index Data ApS This file is part of the Zebra server. @@ -20,6 +20,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include #include #include @@ -30,7 +31,8 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define ISAMB_DEBUG 0 #endif -#define ISAMB_MAJOR_VERSION 2 + +#define ISAMB_MAJOR_VERSION 3 #define ISAMB_MINOR_VERSION 0 struct ISAMB_head { @@ -43,6 +45,9 @@ struct ISAMB_head { int block_offset; }; +/* if 1, upper nodes items are encoded; 0 if not encoded */ +#define INT_ENCODE 1 + /* maximum size of encoded buffer */ #define DST_ITEM_MAX 256 @@ -58,11 +63,16 @@ struct ISAMB_head { /* CAT_NO: <= CAT_MAX */ #define CAT_NO 4 -/* ISAMB_PTR_CODEC=1 var, =0 fixed */ +/* Smallest block size */ +#define ISAMB_MIN_SIZE 32 +/* Size factor */ +#define ISAMB_FAC_SIZE 4 + +/* ISAMB_PTR_CODEC = 1 var, =0 fixed */ #define ISAMB_PTR_CODEC 1 struct ISAMB_cache_entry { - ISAMB_P pos; + ISAM_P pos; unsigned char *buf; int dirty; int hits; @@ -82,17 +92,17 @@ struct ISAMB_s { struct ISAMB_file *file; int no_cat; - int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */ + int cache; /* 0 = no cache, 1 = use cache, -1 = dummy isam (for testing only) */ int log_io; /* log level for bf_read/bf_write calls */ int log_freelist; /* log level for freelist handling */ zint skipped_numbers; /* on a leaf node */ zint returned_numbers; - zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */ zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ }; struct ISAMB_block { - ISAMB_P pos; + ISAM_P pos; int cat; int size; int leaf; @@ -109,43 +119,45 @@ struct ISAMB_block { struct ISAMB_PP_s { ISAMB isamb; - ISAMB_P pos; + ISAM_P pos; int level; int maxlevel; /* total depth */ zint total_size; zint no_blocks; zint skipped_numbers; /* on a leaf node */ zint returned_numbers; - zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */ + zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */ zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ struct ISAMB_block **block; int scope; /* on what level we forward */ }; +#define encode_item_len encode_ptr #if ISAMB_PTR_CODEC -static void encode_ptr (char **dst, zint pos) +static void encode_ptr(char **dst, zint pos) { unsigned char *bp = (unsigned char*) *dst; while (pos > 127) { - *bp++ = 128 | (pos & 127); + *bp++ = (unsigned char) (128 | (pos & 127)); pos = pos >> 7; } - *bp++ = pos; + *bp++ = (unsigned char) pos; *dst = (char *) bp; } #else -static void encode_ptr (char **dst, zint pos) +static void encode_ptr(char **dst, zint pos) { memcpy(*dst, &pos, sizeof(pos)); (*dst) += sizeof(pos); } #endif +#define decode_item_len decode_ptr #if ISAMB_PTR_CODEC -static void decode_ptr (const char **src1, zint *pos) +static void decode_ptr(const char **src1, zint *pos) { const unsigned char **src = (const unsigned char **) src1; zint d = 0; @@ -161,45 +173,45 @@ static void decode_ptr (const char **src1, zint *pos) *pos = d; } #else -static void decode_ptr (const char **src, zint *pos) +static void decode_ptr(const char **src, zint *pos) { - memcpy (pos, *src, sizeof(*pos)); + memcpy(pos, *src, sizeof(*pos)); (*src) += sizeof(*pos); } #endif -ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, - int cache) +ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, + int cache) { - ISAMB isamb = xmalloc (sizeof(*isamb)); - int i, b_size = 32; + ISAMB isamb = xmalloc(sizeof(*isamb)); + int i, b_size = ISAMB_MIN_SIZE; isamb->bfs = bfs; - isamb->method = (ISAMC_M *) xmalloc (sizeof(*method)); - memcpy (isamb->method, method, sizeof(*method)); + isamb->method = (ISAMC_M *) xmalloc(sizeof(*method)); + memcpy(isamb->method, method, sizeof(*method)); isamb->no_cat = CAT_NO; isamb->log_io = 0; isamb->log_freelist = 0; isamb->cache = cache; - isamb->skipped_numbers=0; - isamb->returned_numbers=0; - for (i=0;iskipped_nodes[i]= isamb->accessed_nodes[i]=0; + isamb->skipped_numbers = 0; + isamb->returned_numbers = 0; + for (i = 0; iskipped_nodes[i] = isamb->accessed_nodes[i] = 0; - assert (cache == 0); - isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat); + assert(cache == 0); + isamb->file = xmalloc(sizeof(*isamb->file) * isamb->no_cat); for (i = 0; i < isamb->no_cat; i++) { char fname[DST_BUF_SIZE]; char hbuf[DST_BUF_SIZE]; isamb->file[i].cache_entries = 0; isamb->file[i].head_dirty = 0; - sprintf (fname, "%s%c", name, i+'A'); + sprintf(fname, "%s%c", name, i+'A'); if (cache) - isamb->file[i].bf = bf_open (bfs, fname, ISAMB_CACHE_ENTRY_SIZE, + isamb->file[i].bf = bf_open(bfs, fname, ISAMB_CACHE_ENTRY_SIZE, writeflag); else - isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag); + isamb->file[i].bf = bf_open(bfs, fname, b_size, writeflag); /* fill-in default values (for empty isamb) */ isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; @@ -216,7 +228,7 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, isamb->file[i].head.block_max = b_size - isamb->file[i].head.block_offset; isamb->file[i].head.free_list = 0; - if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf)) + if (bf_read(isamb->file[i].bf, 0, 0, 0, hbuf)) { /* got header assume "isamb"major minor len can fit in 16 bytes */ zint zint_tmp; @@ -242,7 +254,7 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, for (left = len - b_size; left > 0; left = left - b_size) { pos++; - if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) + if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) { yaz_log(YLOG_WARN, "truncated isamb header for " "file=%s len=%d pos=%d", @@ -254,15 +266,15 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, decode_ptr(&src, &isamb->file[i].head.first_block); decode_ptr(&src, &isamb->file[i].head.last_block); decode_ptr(&src, &zint_tmp); - isamb->file[i].head.block_size = zint_tmp; + isamb->file[i].head.block_size = (int) zint_tmp; decode_ptr(&src, &zint_tmp); - isamb->file[i].head.block_max = zint_tmp; + isamb->file[i].head.block_max = (int) zint_tmp; decode_ptr(&src, &isamb->file[i].head.free_list); } assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset); isamb->file[i].head_dirty = 0; assert(isamb->file[i].head.block_size == b_size); - b_size = b_size * 4; + b_size = b_size * ISAMB_FAC_SIZE; } #if ISAMB_DEBUG yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual"); @@ -279,15 +291,15 @@ static void flush_blocks (ISAMB b, int cat) if (ce_this->dirty) { - yaz_log (b->log_io, "bf_write: flush_blocks"); - bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); + yaz_log(b->log_io, "bf_write: flush_blocks"); + bf_write(b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); } - xfree (ce_this->buf); - xfree (ce_this); + xfree(ce_this->buf); + xfree(ce_this); } } -static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) +static int cache_block (ISAMB b, ISAM_P pos, char *userbuf, int wr) { int cat = (int) (pos&CAT_MASK); int off = (int) (((pos/CAT_MAX) & @@ -332,19 +344,19 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) *ce_last = 0; /* remove the last entry from list */ if (ce_this->dirty) { - yaz_log (b->log_io, "bf_write: get_block"); - bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); + yaz_log(b->log_io, "bf_write: cache_block"); + bf_write(b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); } - xfree (ce_this->buf); - xfree (ce_this); + xfree(ce_this->buf); + xfree(ce_this); } - ce_this = xmalloc (sizeof(*ce_this)); + ce_this = xmalloc(sizeof(*ce_this)); ce_this->next = b->file[cat].cache_entries; b->file[cat].cache_entries = ce_this; - ce_this->buf = xmalloc (ISAMB_CACHE_ENTRY_SIZE); + ce_this->buf = xmalloc(ISAMB_CACHE_ENTRY_SIZE); ce_this->pos = norm; - yaz_log (b->log_io, "bf_read: get_block"); - if (!bf_read (b->file[cat].bf, norm, 0, 0, ce_this->buf)) + yaz_log(b->log_io, "bf_read: cache_block"); + if (!bf_read(b->file[cat].bf, norm, 0, 0, ce_this->buf)) memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE); if (wr) { @@ -363,11 +375,11 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) void isamb_close (ISAMB isamb) { int i; - for (i=0;isamb->accessed_nodes[i];i++) - yaz_log(YLOG_DEBUG,"isamb_close level leaf-%d: "ZINT_FORMAT" read, " + for (i = 0; isamb->accessed_nodes[i]; i++) + yaz_log(YLOG_DEBUG, "isamb_close level leaf-%d: "ZINT_FORMAT" read, " ZINT_FORMAT" skipped", i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]); - yaz_log(YLOG_DEBUG,"isamb_close returned "ZINT_FORMAT" values, " + yaz_log(YLOG_DEBUG, "isamb_close returned "ZINT_FORMAT" values, " "skipped "ZINT_FORMAT, isamb->skipped_numbers, isamb->returned_numbers); for (i = 0; ino_cat; i++) @@ -395,19 +407,19 @@ void isamb_close (ISAMB isamb) /* print exactly 16 bytes (including trailing 0) */ sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len); - bf_write (isamb->file[i].bf, pos, 0, 0, hbuf); + bf_write(isamb->file[i].bf, pos, 0, 0, hbuf); for (left = len - b_size; left > 0; left = left - b_size) { pos++; - bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); + bf_write(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); } } bf_close (isamb->file[i].bf); } - xfree (isamb->file); - xfree (isamb->method); - xfree (isamb); + xfree(isamb->file); + xfree(isamb->method); + xfree(isamb); } /* open_block: read one block at pos. @@ -420,7 +432,7 @@ void isamb_close (ISAMB isamb) * Reserve 5 bytes for large block sizes. 1 for small ones .. Number of items. We can thus have at most 2^40 nodes. */ -static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) +static struct ISAMB_block *open_block(ISAMB b, ISAM_P pos) { int cat = (int) (pos&CAT_MASK); const char *src; @@ -428,18 +440,18 @@ static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) struct ISAMB_block *p; if (!pos) return 0; - p = xmalloc (sizeof(*p)); + p = xmalloc(sizeof(*p)); p->pos = pos; p->cat = (int) (pos & CAT_MASK); - p->buf = xmalloc (b->file[cat].head.block_size); + p->buf = xmalloc(b->file[cat].head.block_size); p->cbuf = 0; - if (!get_block (b, pos, p->buf, 0)) + if (!cache_block (b, pos, p->buf, 0)) { - yaz_log (b->log_io, "bf_read: open_block"); - if (!bf_read (b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf)) + yaz_log(b->log_io, "bf_read: open_block"); + if (!bf_read(b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf)) { - yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", + yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) pos, (long) pos/CAT_MAX); abort(); } @@ -449,7 +461,7 @@ static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) p->size = (p->buf[1] + 256 * p->buf[2]) - offset; if (p->size < 0) { - yaz_log (YLOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n", + yaz_log(YLOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n", p->size, pos); } assert (p->size >= 0); @@ -467,8 +479,8 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) { struct ISAMB_block *p; - p = xmalloc (sizeof(*p)); - p->buf = xmalloc (b->file[cat].head.block_size); + p = xmalloc(sizeof(*p)); + p->buf = xmalloc(b->file[cat].head.block_size); if (!b->file[cat].head.free_list) { @@ -480,19 +492,19 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) { p->pos = b->file[cat].head.free_list; assert((p->pos & CAT_MASK) == cat); - if (!get_block (b, p->pos, p->buf, 0)) + if (!cache_block (b, p->pos, p->buf, 0)) { - yaz_log (b->log_io, "bf_read: new_block"); - if (!bf_read (b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf)) + yaz_log(b->log_io, "bf_read: new_block"); + if (!bf_read(b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf)) { - yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", + yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX); abort (); } } - yaz_log (b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, + yaz_log(b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, cat, p->pos/CAT_MAX); - memcpy (&b->file[cat].head.free_list, p->buf, sizeof(int)); + memcpy (&b->file[cat].head.free_list, p->buf, sizeof(zint)); } p->cat = cat; b->file[cat].head_dirty = 1; @@ -532,39 +544,48 @@ static void check_block (ISAMB b, struct ISAMB_block *p) char *startp = p->bytes; const char *src = startp; char *endp = p->bytes + p->size; - ISAMB_P pos; + ISAM_P pos; + void *c1 = (*b->method->codec.start)(); - decode_ptr (&src, &pos); + decode_ptr(&src, &pos); assert ((pos&CAT_MASK) == p->cat); while (src != endp) { +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); +#else zint item_len; - decode_ptr (&src, &item_len); + decode_item_len(&src, &item_len); assert (item_len > 0 && item_len < 80); src += item_len; - decode_ptr (&src, &pos); +#endif + decode_ptr(&src, &pos); if ((pos&CAT_MASK) != p->cat) { assert ((pos&CAT_MASK) == p->cat); } } + (*b->method->codec.stop)(c1); } } -void close_block (ISAMB b, struct ISAMB_block *p) +void close_block(ISAMB b, struct ISAMB_block *p) { if (!p) return; if (p->deleted) { - yaz_log (b->log_freelist, "release block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, + yaz_log(b->log_freelist, "release block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, p->cat, p->pos/CAT_MAX); - memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(int)); + memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(zint)); b->file[p->cat].head.free_list = p->pos; - if (!get_block (b, p->pos, p->buf, 1)) + if (!cache_block (b, p->pos, p->buf, 1)) { - yaz_log (b->log_io, "bf_write: close_block (deleted)"); - bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); + yaz_log(b->log_io, "bf_write: close_block (deleted)"); + bf_write(b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); } } else if (p->dirty) @@ -581,15 +602,15 @@ void close_block (ISAMB b, struct ISAMB_block *p) p->buf[2] = size >> 8; encode_ptr(&dst, p->no_items); check_block(b, p); - if (!get_block (b, p->pos, p->buf, 1)) + if (!cache_block (b, p->pos, p->buf, 1)) { - yaz_log (b->log_io, "bf_write: close_block"); - bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); + yaz_log(b->log_io, "bf_write: close_block"); + bf_write(b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); } } (*b->method->codec.stop)(p->decodeClientData); - xfree (p->buf); - xfree (p); + xfree(p->buf); + xfree(p); } int insert_sub (ISAMB b, struct ISAMB_block **p, @@ -607,27 +628,47 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, char *startp = p->bytes; const char *src = startp; char *endp = p->bytes + p->size; - ISAMB_P pos; + ISAM_P pos; struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0; char sub_item[DST_ITEM_MAX]; int sub_size; int more = 0; zint diff_terms = 0; + void *c1 = (*b->method->codec.start)(); *sp = 0; assert(p->size >= 0); - decode_ptr (&src, &pos); + decode_ptr(&src, &pos); while (src != endp) { - zint item_len; int d; const char *src0 = src; - decode_ptr (&src, &item_len); +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); + d = (*b->method->compare_item)(file_item_buf, lookahead_item); + if (d > 0) + { + sub_p1 = open_block(b, pos); + assert (sub_p1); + diff_terms -= sub_p1->no_items; + more = insert_sub (b, &sub_p1, lookahead_item, mode, + stream, &sub_p2, + sub_item, &sub_size, file_item_buf); + diff_terms += sub_p1->no_items; + src = src0; + break; + } +#else + zint item_len; + decode_item_len(&src, &item_len); d = (*b->method->compare_item)(src, lookahead_item); if (d > 0) { - sub_p1 = open_block (b, pos); + sub_p1 = open_block(b, pos); assert (sub_p1); diff_terms -= sub_p1->no_items; more = insert_sub (b, &sub_p1, lookahead_item, mode, @@ -638,11 +679,13 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, break; } src += item_len; - decode_ptr (&src, &pos); +#endif + decode_ptr(&src, &pos); } if (!sub_p1) { - sub_p1 = open_block (b, pos); + /* we reached the end. So lookahead > last item */ + sub_p1 = open_block(b, pos); assert (sub_p1); diff_terms -= sub_p1->no_items; more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2, @@ -661,18 +704,25 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, /* there was a split - must insert pointer in this one */ char dst_buf[DST_BUF_SIZE]; char *dst = dst_buf; - +#if INT_ENCODE + const char *sub_item_ptr = sub_item; +#endif assert (sub_size < 80 && sub_size > 1); memcpy (dst, startp, src - startp); dst += src - startp; - encode_ptr (&dst, sub_size); /* sub length and item */ +#if INT_ENCODE + (*b->method->codec.reset)(c1); + (*b->method->codec.encode)(c1, &dst, &sub_item_ptr); +#else + encode_item_len (&dst, sub_size); /* sub length and item */ memcpy (dst, sub_item, sub_size); dst += sub_size; +#endif - encode_ptr (&dst, sub_p2->pos); /* pos */ + encode_ptr(&dst, sub_p2->pos); /* pos */ if (endp - src) /* remaining data */ { @@ -685,12 +735,19 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, if (p->size <= b->file[p->cat].head.block_max) { + /* it fits OK in this block */ memcpy (startp, dst_buf, dst - dst_buf); } else { + /* must split _this_ block as well .. */ struct ISAMB_block *sub_p3; +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; +#else zint split_size_tmp; +#endif zint no_items_first_half = 0; int p_new_size; const char *half; @@ -698,7 +755,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, endp = dst; half = src + b->file[p->cat].head.block_size/2; - decode_ptr (&src, &pos); + decode_ptr(&src, &pos); /* read sub block so we can get no_items for it */ sub_p3 = open_block(b, pos); @@ -707,11 +764,16 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, while (src <= half) { - decode_ptr (&src, &split_size_tmp); +#if INT_ENCODE + file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); +#else + decode_item_len(&src, &split_size_tmp); *split_size = (int) split_size_tmp; - src += *split_size; - decode_ptr (&src, &pos); +#endif + decode_ptr(&src, &pos); /* read sub block so we can get no_items for it */ sub_p3 = open_block(b, pos); @@ -722,11 +784,18 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, p_new_size = src - dst_buf; memcpy (p->bytes, dst_buf, p_new_size); - decode_ptr (&src, &split_size_tmp); +#if INT_ENCODE + file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); + *split_size = file_item - file_item_buf; + memcpy(split_item, file_item_buf, *split_size); +#else + decode_item_len(&src, &split_size_tmp); *split_size = (int) split_size_tmp; memcpy (split_item, src, *split_size); src += *split_size; - +#endif /* *sp is second half */ *sp = new_int (b, p->cat); (*sp)->size = endp - src; @@ -739,9 +808,10 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, p->no_items = no_items_first_half; } p->dirty = 1; - close_block (b, sub_p2); + close_block(b, sub_p2); } - close_block (b, sub_p1); + close_block(b, sub_p1); + (*b->method->codec.stop)(c1); return more; } @@ -769,6 +839,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int cut_item_size = 0; int no_items = 0; /* number of items (total) */ int no_items_1 = 0; /* number of items (first half) */ + int inserted_dst_bytes = 0; if (p && p->size) { @@ -782,6 +853,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { const char *dst_item = 0; /* resulting item to be inserted */ char *lookahead_next; + char *dst_0 = dst; int d = -1; if (lookahead_item) @@ -800,14 +872,13 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, /* if this is not an insertion, it's really bad .. */ if (!*lookahead_mode) { - yaz_log (YLOG_WARN, "isamb: Inconsistent register (1)"); + yaz_log(YLOG_WARN, "isamb: Inconsistent register (1)"); assert (*lookahead_mode); } } else dst_item = file_item_buf; - if (!*lookahead_mode && d == 0) { /* it's a deletion and they match so there is nothing to be @@ -844,7 +915,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { /* we must move the lookahead pointer */ - if (dst > maxp) + inserted_dst_bytes += (dst - dst_0); + if (inserted_dst_bytes >= quater) /* no more room. Mark lookahead as "gone".. */ lookahead_item = 0; else @@ -896,17 +968,19 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } } } - maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; + /* this loop runs when we are "appending" to a leaf page. That is either it's empty (new) or all file items have been read in previous loop */ + + maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; while (lookahead_item) { char *dst_item; const char *src = lookahead_item; char *dst_0 = dst; - /* compare lookahead with max item */ + /* if we have a lookahead item, we stop if we exceed the value of it */ if (max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { @@ -916,7 +990,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (!*lookahead_mode) { /* this is append. So a delete is bad */ - yaz_log (YLOG_WARN, "isamb: Inconsistent register (2)"); + yaz_log(YLOG_WARN, "isamb: Inconsistent register (2)"); abort(); } else if (!half1 && dst > tail_cut) @@ -958,7 +1032,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { /* non-btree block will be removed */ p->deleted = 1; - close_block (b, p); + close_block(b, p); /* delete it too!! */ p = 0; /* make a new one anyway */ } @@ -984,6 +1058,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, /* first half */ p->size = half1 - dst_buf; + assert(p->size <= b->file[p->cat].head.block_max); memcpy (p->bytes, dst_buf, half1 - dst_buf); p->no_items = no_items_1; @@ -999,6 +1074,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, memcpy (first_dst, half2, dst - half2); (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2); + assert((*sp2)->size <= b->file[p->cat].head.block_max); (*sp2)->no_items = no_items - no_items_1; (*sp2)->dirty = 1; p->dirty = 1; @@ -1032,7 +1108,7 @@ int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, sub_size, max_item); } -int isamb_unlink (ISAMB b, ISAMC_P pos) +int isamb_unlink (ISAMB b, ISAM_P pos) { struct ISAMB_block *p1; @@ -1043,25 +1119,37 @@ int isamb_unlink (ISAMB b, ISAMC_P pos) if (!p1->leaf) { zint sub_p; - zint item_len; const char *src = p1->bytes + p1->offset; - +#if INT_ENCODE + void *c1 = (*b->method->codec.start)(); +#endif decode_ptr(&src, &sub_p); isamb_unlink(b, sub_p); while (src != p1->bytes + p1->size) { - decode_ptr(&src, &item_len); +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); +#else + zint item_len; + decode_item_len(&src, &item_len); src += item_len; +#endif decode_ptr(&src, &sub_p); isamb_unlink(b, sub_p); } +#if INT_ENCODE + (*b->method->codec.stop)(c1); +#endif } close_block(b, p1); return 0; } -ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) +void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream) { char item_buf[DST_ITEM_MAX]; char *item_ptr; @@ -1078,7 +1166,8 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); } - return 1; + *pos = 1; + return; } item_ptr = item_buf; more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); @@ -1088,69 +1177,80 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) char sub_item[DST_ITEM_MAX]; int sub_size; - if (pos) - p = open_block (b, pos); + if (*pos) + p = open_block(b, *pos); more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp, sub_item, &sub_size, 0); if (sp) { /* increase level of tree by one */ struct ISAMB_block *p2 = new_int (b, p->cat); char *dst = p2->bytes + p2->size; - - encode_ptr (&dst, p->pos); - assert (sub_size < 40); - encode_ptr (&dst, sub_size); +#if INT_ENCODE + void *c1 = (*b->method->codec.start)(); + const char *sub_item_ptr = sub_item; +#endif + + encode_ptr(&dst, p->pos); + assert (sub_size < 80 && sub_size > 1); +#if INT_ENCODE + (*b->method->codec.reset)(c1); + (*b->method->codec.encode)(c1, &dst, &sub_item_ptr); +#else + encode_item_len (&dst, sub_size); memcpy (dst, sub_item, sub_size); dst += sub_size; - encode_ptr (&dst, sp->pos); +#endif + encode_ptr(&dst, sp->pos); p2->size = dst - p2->bytes; p2->no_items = p->no_items + sp->no_items; - pos = p2->pos; /* return new super page */ - close_block (b, sp); - close_block (b, p2); + *pos = p2->pos; /* return new super page */ + close_block(b, sp); + close_block(b, p2); +#if INT_ENCODE + (*b->method->codec.stop)(c1); +#endif } else { - pos = p->pos; /* return current one (again) */ + *pos = p->pos; /* return current one (again) */ } if (p->no_items == 0) must_delete = 1; else must_delete = 0; - close_block (b, p); + close_block(b, p); } if (must_delete) { - isamb_unlink(b, pos); - return 0; + isamb_unlink(b, *pos); + *pos = 0; } - return pos; } -ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level, int scope) +ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope) { - ISAMB_PP pp = xmalloc (sizeof(*pp)); + ISAMB_PP pp = xmalloc(sizeof(*pp)); int i; assert(pos); pp->isamb = isamb; - pp->block = xmalloc (ISAMB_MAX_LEVEL * sizeof(*pp->block)); + pp->block = xmalloc(ISAMB_MAX_LEVEL * sizeof(*pp->block)); pp->pos = pos; pp->level = 0; - pp->maxlevel=0; + pp->maxlevel = 0; pp->total_size = 0; pp->no_blocks = 0; - pp->skipped_numbers=0; - pp->returned_numbers=0; - pp->scope=scope; - for (i=0;iskipped_nodes[i] = pp->accessed_nodes[i]=0; + pp->skipped_numbers = 0; + pp->returned_numbers = 0; + pp->scope = scope; + for (i = 0; iskipped_nodes[i] = pp->accessed_nodes[i] = 0; while (1) { - struct ISAMB_block *p = open_block (isamb, pos); + struct ISAMB_block *p = open_block(isamb, pos); const char *src = p->bytes + p->offset; pp->block[pp->level] = p; @@ -1158,39 +1258,39 @@ ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level, int scope) pp->no_blocks++; if (p->leaf) break; - decode_ptr (&src, &pos); + decode_ptr(&src, &pos); p->offset = src - p->bytes; pp->level++; pp->accessed_nodes[pp->level]++; } pp->block[pp->level+1] = 0; - pp->maxlevel=pp->level; + pp->maxlevel = pp->level; if (level) *level = pp->level; return pp; } -ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos, int scope) +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAM_P pos, int scope) { - return isamb_pp_open_x (isamb, pos, 0, scope); + return isamb_pp_open_x(isamb, pos, 0, scope); } -void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) +void isamb_pp_close_x(ISAMB_PP pp, zint *size, zint *blocks) { int i; if (!pp) return; - yaz_log(YLOG_DEBUG,"isamb_pp_close lev=%d returned "ZINT_FORMAT" values," + yaz_log(YLOG_DEBUG, "isamb_pp_close lev=%d returned "ZINT_FORMAT" values, " "skipped "ZINT_FORMAT, pp->maxlevel, pp->skipped_numbers, pp->returned_numbers); - for (i=pp->maxlevel;i>=0;i--) - if ( pp->skipped_nodes[i] || pp->accessed_nodes[i]) - yaz_log(YLOG_DEBUG,"isamb_pp_close level leaf-%d: " + for (i = pp->maxlevel; i>=0; i--) + if (pp->skipped_nodes[i] || pp->accessed_nodes[i]) + yaz_log(YLOG_DEBUG, "isamb_pp_close level leaf-%d: " ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i, pp->accessed_nodes[i], pp->skipped_nodes[i]); pp->isamb->skipped_numbers += pp->skipped_numbers; pp->isamb->returned_numbers += pp->returned_numbers; - for (i=pp->maxlevel;i>=0;i--) + for (i = pp->maxlevel; i>=0; i--) { pp->isamb->accessed_nodes[i] += pp->accessed_nodes[i]; pp->isamb->skipped_nodes[i] += pp->skipped_nodes[i]; @@ -1200,9 +1300,9 @@ void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) if (blocks) *blocks = pp->no_blocks; for (i = 0; i <= pp->level; i++) - close_block (pp->isamb, pp->block[i]); - xfree (pp->block); - xfree (pp); + close_block(pp->isamb, pp->block[i]); + xfree(pp->block); + xfree(pp); } int isamb_block_info (ISAMB isamb, int cat) @@ -1214,18 +1314,18 @@ int isamb_block_info (ISAMB isamb, int cat) void isamb_pp_close (ISAMB_PP pp) { - isamb_pp_close_x (pp, 0, 0); + isamb_pp_close_x(pp, 0, 0); } /* simple recursive dumper .. */ -static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), +static void isamb_dump_r (ISAMB b, ISAM_P pos, void (*pr)(const char *str), int level) { char buf[1024]; char prefix_str[1024]; if (pos) { - struct ISAMB_block *p = open_block (b, pos); + struct ISAMB_block *p = open_block(b, pos); sprintf(prefix_str, "%*s " ZINT_FORMAT " cat=%d size=%d max=%d items=" ZINT_FORMAT, level*2, "", pos, p->cat, p->size, b->file[p->cat].head.block_max, @@ -1247,36 +1347,45 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), else { const char *src = p->bytes + p->offset; - ISAMB_P sub; - zint item_len; + ISAM_P sub; - decode_ptr (&src, &sub); + decode_ptr(&src, &sub); p->offset = src - (char*) p->bytes; isamb_dump_r(b, sub, pr, level+1); while (p->offset < p->size) { - decode_ptr (&src, &item_len); +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + void *c1 = (*b->method->codec.start)(); + (*b->method->codec.decode)(c1, &file_item, &src); + (*b->method->codec.stop)(c1); + (*b->method->log_item)(YLOG_DEBUG, file_item_buf, prefix_str); +#else + zint item_len; + decode_item_len(&src, &item_len); (*b->method->log_item)(YLOG_DEBUG, src, prefix_str); src += item_len; - decode_ptr (&src, &sub); +#endif + decode_ptr(&src, &sub); p->offset = src - (char*) p->bytes; isamb_dump_r(b, sub, pr, level+1); } } - close_block(b,p); + close_block(b, p); } } -void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str)) +void isamb_dump(ISAMB b, ISAM_P pos, void (*pr)(const char *str)) { isamb_dump_r(b, pos, pr, 0); } -int isamb_pp_read (ISAMB_PP pp, void *buf) +int isamb_pp_read(ISAMB_PP pp, void *buf) { return isamb_pp_forward(pp, buf, 0); } @@ -1289,70 +1398,90 @@ static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf) struct ISAMB_block *p; int cmp; const char *src; - zint item_len; - assert(level>=0); - if ( level == 0) { + ISAMB b = pp->isamb; + + assert(level >= 0); + if (level == 0) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning true for root"); + yaz_log(YLOG_DEBUG, "isamb_pp_on_right returning true for root"); #endif return 1; /* we can never skip the root node */ } level--; - p=pp->block[level]; + p = pp->block[level]; assert(p->offset <= p->size); - if (p->offset < p->size ) + if (p->offset < p->size) { - assert(p->offset>0); - src=p->bytes + p->offset; - decode_ptr(&src, &item_len); +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + void *c1 = (*b->method->codec.start)(); + assert(p->offset > 0); + src = p->bytes + p->offset; + (*b->method->codec.decode)(c1, &file_item, &src); + (*b->method->codec.stop)(c1); + cmp = (*b->method->compare_item)(untilbuf, file_item_buf); +#else + zint item_len; + assert(p->offset > 0); + src = p->bytes + p->offset; + decode_item_len(&src, &item_len); #if ISAMB_DEBUG - (*pp->isamb->method->codec.log_item)(YLOG_DEBUG,untilbuf,"on_leaf: until"); - (*pp->isamb->method->codec.log_item)(YLOG_DEBUG,src,"on_leaf: value"); + (*b->method->codec.log_item)(YLOG_DEBUG, untilbuf, "on_leaf: until"); + (*b->method->codec.log_item)(YLOG_DEBUG, src, "on_leaf: value"); +#endif + cmp = (*b->method->compare_item)(untilbuf, src); #endif - cmp=(*pp->isamb->method->compare_item)(untilbuf,src); - if (cmpscope) { /* cmp<2 */ + if (cmp < pp->scope) + { /* cmp<2 */ #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning true " - "cmp=%d lev=%d ofs=%d",cmp,level,p->offset); + yaz_log(YLOG_DEBUG, "isamb_pp_on_right returning true " + "cmp=%d lev=%d ofs=%d", cmp, level, p->offset); #endif return 1; } - else { + else + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning false " - "cmp=%d lev=%d ofs=%d",cmp,level,p->offset); + yaz_log(YLOG_DEBUG, "isamb_pp_on_right returning false " + "cmp=%d lev=%d ofs=%d", cmp, level, p->offset); #endif return 0; } } else { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_on_right at tail, looking higher " - "lev=%d",level); + yaz_log(YLOG_DEBUG, "isamb_pp_on_right at tail, looking higher " + "lev=%d", level); #endif return isamb_pp_on_right_node(pp, level, untilbuf); } } /* isamb_pp_on_right_node */ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) -{ /* reads the next item on the current leaf, returns 0 if end of leaf*/ +{ + /* reads the next item on the current leaf, returns 0 if end of leaf*/ struct ISAMB_block *p = pp->block[pp->level]; char *dst; const char *src; assert(pp); assert(buf); - if (p->offset == p->size) { + if (p->offset == p->size) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_read_on_leaf returning 0 on node %d",p->pos); + yaz_log(YLOG_DEBUG, "isamb_pp_read_on_leaf returning 0 on " + "node %d", p->pos); #endif return 0; /* at end of leaf */ } - src=p->bytes + p->offset; - dst=buf; - (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src); + src = p->bytes + p->offset; + dst = buf; + (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src); p->offset = src - (char*) p->bytes; #if ISAMB_DEBUG - (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, buf, "read_on_leaf returning 1"); + (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, buf, + "read_on_leaf returning 1"); #endif pp->returned_numbers++; return 1; @@ -1361,16 +1490,18 @@ static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf) static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf) { /* forwards on the current leaf, returns 0 if not found */ int cmp; - int skips=0; - while (1){ - if (!isamb_pp_read_on_leaf(pp,buf)) + int skips = 0; + while (1) + { + if (!isamb_pp_read_on_leaf(pp, buf)) return 0; /* FIXME - this is an extra function call, inline the read? */ - cmp=(*pp->isamb->method->compare_item)(untilbuf,buf); - if (cmp scope){ /* cmp<2 found a good one */ + cmp=(*pp->isamb->method->compare_item)(untilbuf, buf); + if (cmp scope) + { /* cmp<2 found a good one */ #if ISAMB_DEBUG if (skips) - yaz_log(YLOG_DEBUG, "isam_pp_fwd_on_leaf skipped %d items",skips); + yaz_log(YLOG_DEBUG, "isam_pp_fwd_on_leaf skipped %d items", skips); #endif pp->returned_numbers++; return 1; @@ -1383,14 +1514,13 @@ static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf } } /* forward_on_leaf */ -static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) +static int isamb_pp_climb_level(ISAMB_PP pp, ISAM_P *pos) { /* climbs higher in the tree, until finds a level with data left */ /* returns the node to (consider to) descend to in *pos) */ struct ISAMB_block *p = pp->block[pp->level]; const char *src; - zint item_len; #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_climb_level starting " + yaz_log(YLOG_DEBUG, "isamb_pp_climb_level starting " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1399,40 +1529,54 @@ static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) if (pp->level==0) { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_climb_level returning 0 at root"); + yaz_log(YLOG_DEBUG, "isamb_pp_climb_level returning 0 at root"); #endif return 0; } assert(pp->level>0); close_block(pp->isamb, pp->block[pp->level]); - pp->block[pp->level]=0; + pp->block[pp->level] = 0; (pp->level)--; - p=pp->block[pp->level]; + p = pp->block[pp->level]; #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_climb_level climbed to level %d node %d ofs=%d", + yaz_log(YLOG_DEBUG, "isamb_pp_climb_level climbed to level %d node %d ofs=%d", pp->level, p->pos, p->offset); #endif assert(!p->leaf); assert(p->offset <= p->size); - if (p->offset == p->size ) { + if (p->offset == p->size) + { /* we came from the last pointer, climb on */ - if (!isamb_pp_climb_level(pp,pos)) + if (!isamb_pp_climb_level(pp, pos)) return 0; - p=pp->block[pp->level]; + p = pp->block[pp->level]; } else { +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + ISAMB b = pp->isamb; + void *c1 = (*b->method->codec.start)(); +#else + zint item_len; +#endif /* skip the child we just came from */ #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d", + yaz_log(YLOG_DEBUG, "isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d", pp->level, p->offset, p->size); #endif - assert (p->offset < p->size ); - src=p->bytes + p->offset; - decode_ptr(&src, &item_len); + assert (p->offset < p->size); + src = p->bytes + p->offset; +#if INT_ENCODE + (*b->method->codec.decode)(c1, &file_item, &src); + (*b->method->codec.stop)(c1); +#else + decode_item_len(&src, &item_len); src += item_len; +#endif decode_ptr(&src, pos); - p->offset=src - (char *)p->bytes; + p->offset = src - (char *)p->bytes; } return 1; @@ -1447,50 +1591,62 @@ static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) /* FIXME - this can be detected, and avoided by looking at the */ /* parent node, but that gets messy. Presumably the cost is */ /* pretty low anyway */ + ISAMB b = pp->isamb; struct ISAMB_block *p = pp->block[pp->level]; - const char *src=p->bytes + p->offset; - zint item_len; + const char *src = p->bytes + p->offset; int cmp; zint nxtpos; #if ISAMB_DEBUG - int skips=0; - yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode starting " + int skips = 0; + yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode starting " "at level %d node %d ofs=%di sz=%d", pp->level, p->pos, p->offset, p->size); #endif assert(!p->leaf); assert(p->offset <= p->size); - if (p->offset == p->size) { + if (p->offset == p->size) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning at end " + yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode returning at end " "at level %d node %d ofs=%di sz=%d", pp->level, p->pos, p->offset, p->size); #endif return pos; /* already at the end of it */ } - while(p->offset < p->size) { - decode_ptr(&src,&item_len); - cmp=(*pp->isamb->method->compare_item)(untilbuf,src); - src+=item_len; - decode_ptr(&src,&nxtpos); + while(p->offset < p->size) + { +#if INT_ENCODE + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; + void *c1 = (*b->method->codec.start)(); + (*b->method->codec.decode)(c1, &file_item, &src); + (*b->method->codec.stop)(c1); + cmp = (*b->method->compare_item)(untilbuf, file_item_buf); +#else + zint item_len; + decode_item_len(&src, &item_len); + cmp = (*b->method->compare_item)(untilbuf, src); + src += item_len; +#endif + decode_ptr(&src, &nxtpos); if (cmpscope) /* cmp<2 */ { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning a hit " + yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode returning a hit " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif return pos; } /* found one */ - pos=nxtpos; - p->offset=src-(char*)p->bytes; + pos = nxtpos; + p->offset = src-(char*)p->bytes; (pp->skipped_nodes[pp->maxlevel - pp->level -1])++; #if ISAMB_DEBUG skips++; #endif } #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning at tail " + yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode returning at tail " "at level %d node %d ofs=%d sz=%d skips=%d", pp->level, p->pos, p->offset, p->size, skips); #endif @@ -1498,38 +1654,39 @@ static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) } /* forward_unode */ -static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *untilbuf) +static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAM_P pos, + const void *untilbuf) { /* climbs down the tree, from pos, to the leftmost leaf */ struct ISAMB_block *p = pp->block[pp->level]; const char *src; assert(!p->leaf); #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf " + yaz_log(YLOG_DEBUG, "isamb_pp_descend_to_leaf " "starting at lev %d node %d ofs=%d lf=%d u=%p", pp->level, p->pos, p->offset, p->leaf, untilbuf); #endif if (untilbuf) - pos=isamb_pp_forward_unode(pp,pos,untilbuf); + pos = isamb_pp_forward_unode(pp, pos, untilbuf); ++(pp->level); assert(pos); - p=open_block(pp->isamb, pos); - pp->block[pp->level]=p; + p = open_block(pp->isamb, pos); + pp->block[pp->level] = p; ++(pp->accessed_nodes[pp->maxlevel-pp->level]); ++(pp->no_blocks); #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf " + yaz_log(YLOG_DEBUG, "isamb_pp_descend_to_leaf " "got lev %d node %d lf=%d", pp->level, p->pos, p->leaf); #endif if (p->leaf) return; - assert (p->offset==0 ); - src=p->bytes + p->offset; + assert (p->offset==0); + src = p->bytes + p->offset; decode_ptr(&src, &pos); - p->offset=src-(char*)p->bytes; - isamb_pp_descend_to_leaf(pp,pos,untilbuf); + p->offset = src-(char*)p->bytes; + isamb_pp_descend_to_leaf(pp, pos, untilbuf); #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf " + yaz_log(YLOG_DEBUG, "isamb_pp_descend_to_leaf " "returning at lev %d node %d ofs=%d lf=%d", pp->level, p->pos, p->offset, p->leaf); #endif @@ -1537,32 +1694,32 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *until static int isamb_pp_find_next_leaf(ISAMB_PP pp) { /* finds the next leaf by climbing up and down */ - ISAMB_P pos; - if (!isamb_pp_climb_level(pp,&pos)) + ISAM_P pos; + if (!isamb_pp_climb_level(pp, &pos)) return 0; - isamb_pp_descend_to_leaf(pp, pos,0); + isamb_pp_descend_to_leaf(pp, pos, 0); return 1; } static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf) { /* climbs up and descends to a leaf where values >= *untilbuf are found */ - ISAMB_P pos; + ISAM_P pos; #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; - yaz_log(YLOG_DEBUG,"isamb_pp_climb_desc starting " + yaz_log(YLOG_DEBUG, "isamb_pp_climb_desc starting " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif - if (!isamb_pp_climb_level(pp,&pos)) + if (!isamb_pp_climb_level(pp, &pos)) return 0; /* see if it would pay to climb one higher */ if (!isamb_pp_on_right_node(pp, pp->level, untilbuf)) - if (!isamb_pp_climb_level(pp,&pos)) + if (!isamb_pp_climb_level(pp, &pos)) return 0; - isamb_pp_descend_to_leaf(pp, pos,untilbuf); + isamb_pp_descend_to_leaf(pp, pos, untilbuf); #if ISAMB_DEBUG p = pp->block[pp->level]; - yaz_log(YLOG_DEBUG,"isamb_pp_climb_desc done " + yaz_log(YLOG_DEBUG, "isamb_pp_climb_desc done " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1574,37 +1731,41 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; assert(p->leaf); - yaz_log(YLOG_DEBUG,"isamb_pp_forward starting " + yaz_log(YLOG_DEBUG, "isamb_pp_forward starting " "at level %d node %d ofs=%d sz=%d u=%p sc=%d", - pp->level, p->pos, p->offset, p->size,untilbuf, scope); + pp->level, p->pos, p->offset, p->size, untilbuf, scope); #endif - if (untilbuf) { - if (isamb_pp_forward_on_leaf( pp, buf, untilbuf)) { + if (untilbuf) + { + if (isamb_pp_forward_on_leaf(pp, buf, untilbuf)) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning (A) " + yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning (A) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif return 1; } - if (! isamb_pp_climb_desc( pp, untilbuf)) { + if (! isamb_pp_climb_desc(pp, untilbuf)) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) " + yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning notfound (B) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif return 0; /* could not find a leaf */ } - do{ - if (isamb_pp_forward_on_leaf( pp, buf, untilbuf)) { + do { + if (isamb_pp_forward_on_leaf(pp, buf, untilbuf)) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning (C) " - "at level %d node %d ofs=%d sz=%d", - pp->level, p->pos, p->offset, p->size); + yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning (c) " + "at level %d node %d ofs=%d sz=%d", + pp->level, p->pos, p->offset, p->size); #endif return 1; } - }while ( isamb_pp_find_next_leaf(pp)); + } while (isamb_pp_find_next_leaf(pp)); return 0; /* could not find at all */ } else { /* no untilbuf, a straight read */ @@ -1612,17 +1773,19 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) * directly into the pp_read */ /* keeping here now, to keep same * interface as the old fwd */ - if (isamb_pp_read_on_leaf( pp, buf)) { + if (isamb_pp_read_on_leaf(pp, buf)) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward (read) returning (D) " + yaz_log(YLOG_DEBUG, "isamb_pp_forward (read) returning (D) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif return 1; } - if (isamb_pp_find_next_leaf(pp)) { + if (isamb_pp_find_next_leaf(pp)) + { #if ISAMB_DEBUG - yaz_log(YLOG_DEBUG,"isamb_pp_forward (read) returning (E) " + yaz_log(YLOG_DEBUG, "isamb_pp_forward (read) returning (E) " "at level %d node %d ofs=%d sz=%d", pp->level, p->pos, p->offset, p->size); #endif @@ -1633,7 +1796,7 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) } } /* isam_pp_forward (new version) */ -void isamb_pp_pos( ISAMB_PP pp, double *current, double *total ) +void isamb_pp_pos(ISAMB_PP pp, double *current, double *total) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ struct ISAMB_block *p = pp->block[pp->level]; @@ -1641,10 +1804,143 @@ void isamb_pp_pos( ISAMB_PP pp, double *current, double *total ) assert(current); assert(p->leaf); - *total = pp->block[0]->no_items; + *total = (double) (pp->block[0]->no_items); *current = (double) pp->returned_numbers; #if ISAMB_DEBUG yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn=" ZINT_FORMAT, *current, *total, pp->returned_numbers); #endif } + +int isamb_pp_forward2(ISAMB_PP pp, void *buf, const void *untilb) +{ + char *dst = buf; + const char *src; + struct ISAMB_block *p = pp->block[pp->level]; + ISAMB b = pp->isamb; + if (!p) + return 0; +again: + while (p->offset == p->size) + { + ISAM_P pos; +#if INT_ENCODE + const char *src_0; + void *c1; + char file_item_buf[DST_ITEM_MAX]; + char *file_item = file_item_buf; +#else + zint item_len; +#endif + while (p->offset == p->size) + { + if (pp->level == 0) + return 0; + close_block (pp->isamb, pp->block[pp->level]); + pp->block[pp->level] = 0; + (pp->level)--; + p = pp->block[pp->level]; + assert (!p->leaf); + } + + assert(!p->leaf); + src = p->bytes + p->offset; + +#if INT_ENCODE + c1 = (*b->method->codec.start)(); + (*b->method->codec.decode)(c1, &file_item, &src); +#else + decode_ptr (&src, &item_len); + src += item_len; +#endif + decode_ptr (&src, &pos); + p->offset = src - (char*) p->bytes; + + src = p->bytes + p->offset; + + while(1) + { + if (!untilb || p->offset == p->size) + break; + assert(p->offset < p->size); +#if INT_ENCODE + src_0 = src; + file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); + if ((*b->method->compare_item)(untilb, file_item_buf) <= 1) + { + src = src_0; + break; + } +#else + decode_item_len(&src, &item_len); + if ((*b->method->compare_item)(untilb, src) <= 1) + break; + src += item_len; +#endif + decode_ptr (&src, &pos); + p->offset = src - (char*) p->bytes; + } + + pp->level++; + + while (1) + { + pp->block[pp->level] = p = open_block (pp->isamb, pos); + + pp->total_size += p->size; + pp->no_blocks++; + + if (p->leaf) + { + break; + } + + src = p->bytes + p->offset; + while(1) + { + decode_ptr (&src, &pos); + p->offset = src - (char*) p->bytes; + + if (!untilb || p->offset == p->size) + break; + assert(p->offset < p->size); +#if INT_ENCODE + src_0 = src; + file_item = file_item_buf; + (*b->method->codec.reset)(c1); + (*b->method->codec.decode)(c1, &file_item, &src); + if ((*b->method->compare_item)(untilb, file_item_buf) <= 1) + { + src = src_0; + break; + } +#else + decode_ptr (&src, &item_len); + if ((*b->method->compare_item)(untilb, src) <= 1) + break; + src += item_len; +#endif + } + pp->level++; + } +#if INT_ENCODE + (*b->method->codec.stop)(c1); +#endif + } + assert (p->offset < p->size); + assert (p->leaf); + while(1) + { + char *dst0 = dst; + src = p->bytes + p->offset; + (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src); + p->offset = src - (char*) p->bytes; + if (!untilb || (*pp->isamb->method->compare_item)(untilb, dst0) <= 1) + break; + dst = dst0; + if (p->offset == p->size) goto again; + } + return 1; +}