X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=9d4d4008588ad1cb8fd2c9af92d5cb9459c2b1d9;hb=28045b27be52a2f8ac41900051612f5640cea165;hp=1f57c7313550ed04cdf1dc32442a0ae614c461eb;hpb=49d0ee122a9f86ec2967b577dcc297c501785edd;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index 1f57c73..9d4d400 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,5 +1,5 @@ -/* $Id: isamb.c,v 1.77 2005-04-15 10:47:49 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: isamb.c,v 1.93 2007-04-03 16:54:46 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -33,7 +33,8 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define ISAMB_MAJOR_VERSION 3 -#define ISAMB_MINOR_VERSION 0 +#define ISAMB_MINOR_VERSION_NO_ROOT 0 +#define ISAMB_MINOR_VERSION_WITH_ROOT 1 struct ISAMB_head { zint first_block; @@ -53,8 +54,9 @@ struct ISAMB_head { #define ISAMB_MAX_LEVEL 10 /* approx 2*max page + max size of item */ -#define DST_BUF_SIZE 16840 +#define DST_BUF_SIZE (2*4096+300) +/* should be maximum block size of multiple thereof */ #define ISAMB_CACHE_ENTRY_SIZE 4096 /* CAT_MAX: _must_ be power of 2 */ @@ -99,6 +101,12 @@ struct ISAMB_s { zint returned_numbers; zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */ zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */ + zint number_of_int_splits; + zint number_of_leaf_splits; + int enable_int_count; /* whether we count nodes (or not) */ + int cache_size; /* size of blocks to cache (if cache=1) */ + int minor_version; + zint root_ptr; }; struct ISAMB_block { @@ -157,14 +165,13 @@ static void encode_ptr(char **dst, zint pos) #define decode_item_len decode_ptr #if ISAMB_PTR_CODEC -static void decode_ptr(const char **src1, zint *pos) +static void decode_ptr(const char **src, zint *pos) { - const unsigned char **src = (const unsigned char **) src1; zint d = 0; unsigned char c; unsigned r = 0; - while (((c = *(*src)++) & 128)) + while (((c = *(const unsigned char *)((*src)++)) & 128)) { d += ((zint) (c & 127) << r); r += 7; @@ -180,45 +187,91 @@ static void decode_ptr(const char **src, zint *pos) } #endif -ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, - int cache) + +void isamb_set_int_count(ISAMB b, int v) +{ + b->enable_int_count = v; +} + +void isamb_set_cache_size(ISAMB b, int v) +{ + b->cache_size = v; +} + +ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, + int cache, int no_cat, int *sizes, int use_root_ptr) { ISAMB isamb = xmalloc(sizeof(*isamb)); - int i, b_size = ISAMB_MIN_SIZE; + int i; + + assert(no_cat <= CAT_MAX); isamb->bfs = bfs; isamb->method = (ISAMC_M *) xmalloc(sizeof(*method)); memcpy(isamb->method, method, sizeof(*method)); - isamb->no_cat = CAT_NO; + isamb->no_cat = no_cat; isamb->log_io = 0; isamb->log_freelist = 0; isamb->cache = cache; isamb->skipped_numbers = 0; isamb->returned_numbers = 0; + isamb->number_of_int_splits = 0; + isamb->number_of_leaf_splits = 0; + isamb->enable_int_count = 1; + isamb->cache_size = 40; + + if (use_root_ptr) + isamb->minor_version = ISAMB_MINOR_VERSION_WITH_ROOT; + else + isamb->minor_version = ISAMB_MINOR_VERSION_NO_ROOT; + + isamb->root_ptr = 0; + for (i = 0; iskipped_nodes[i] = isamb->accessed_nodes[i] = 0; - assert(cache == 0); + if (cache == -1) + { + yaz_log(YLOG_WARN, "isamb_open %s. Degraded TEST mode", name); + } + else + { + assert(cache == 0 || cache == 1); + } isamb->file = xmalloc(sizeof(*isamb->file) * isamb->no_cat); + + for (i = 0; i < isamb->no_cat; i++) + { + isamb->file[i].bf = 0; + isamb->file[i].head_dirty = 0; + isamb->file[i].cache_entries = 0; + } + for (i = 0; i < isamb->no_cat; i++) { char fname[DST_BUF_SIZE]; char hbuf[DST_BUF_SIZE]; - isamb->file[i].cache_entries = 0; - isamb->file[i].head_dirty = 0; + sprintf(fname, "%s%c", name, i+'A'); if (cache) isamb->file[i].bf = bf_open(bfs, fname, ISAMB_CACHE_ENTRY_SIZE, writeflag); else - isamb->file[i].bf = bf_open(bfs, fname, b_size, writeflag); + isamb->file[i].bf = bf_open(bfs, fname, sizes[i], writeflag); + + if (!isamb->file[i].bf) + { + isamb_close(isamb); + return 0; + } /* fill-in default values (for empty isamb) */ - isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; + isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/sizes[i]+1; isamb->file[i].head.last_block = isamb->file[i].head.first_block; - isamb->file[i].head.block_size = b_size; + isamb->file[i].head.block_size = sizes[i]; + assert(sizes[i] <= ISAMB_CACHE_ENTRY_SIZE); #if ISAMB_PTR_CODEC - if (i == isamb->no_cat-1 || b_size > 128) + if (i == isamb->no_cat-1 || sizes[i] > 128) isamb->file[i].head.block_offset = 8; else isamb->file[i].head.block_offset = 4; @@ -226,7 +279,7 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, isamb->file[i].head.block_offset = 11; #endif isamb->file[i].head.block_max = - b_size - isamb->file[i].head.block_offset; + sizes[i] - isamb->file[i].head.block_offset; isamb->file[i].head.free_list = 0; if (bf_read(isamb->file[i].bf, 0, 0, 0, hbuf)) { @@ -238,27 +291,31 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, if (memcmp(hbuf, "isamb", 5)) { yaz_log(YLOG_WARN, "bad isamb header for file %s", fname); + isamb_close(isamb); return 0; } if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3) { yaz_log(YLOG_WARN, "bad isamb header for file %s", fname); + isamb_close(isamb); return 0; } if (major != ISAMB_MAJOR_VERSION) { yaz_log(YLOG_WARN, "bad major version for file %s %d, must be %d", fname, major, ISAMB_MAJOR_VERSION); + isamb_close(isamb); return 0; } - for (left = len - b_size; left > 0; left = left - b_size) + for (left = len - sizes[i]; left > 0; left = left - sizes[i]) { pos++; - if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size)) + if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*sizes[i])) { yaz_log(YLOG_WARN, "truncated isamb header for " "file=%s len=%d pos=%d", fname, len, pos); + isamb_close(isamb); return 0; } } @@ -270,11 +327,16 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, decode_ptr(&src, &zint_tmp); isamb->file[i].head.block_max = (int) zint_tmp; decode_ptr(&src, &isamb->file[i].head.free_list); + if (isamb->minor_version >= ISAMB_MINOR_VERSION_WITH_ROOT) + decode_ptr(&src, &isamb->root_ptr); } assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset); - isamb->file[i].head_dirty = 0; - assert(isamb->file[i].head.block_size == b_size); - b_size = b_size * ISAMB_FAC_SIZE; + /* must rewrite the header if root ptr is in use (bug #1017) */ + if (use_root_ptr && writeflag) + isamb->file[i].head_dirty = 1; + else + isamb->file[i].head_dirty = 0; + assert(isamb->file[i].head.block_size == sizes[i]); } #if ISAMB_DEBUG yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual"); @@ -282,6 +344,21 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, return isamb; } +ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, + int cache) +{ + int sizes[CAT_NO]; + int i, b_size = ISAMB_MIN_SIZE; + + for (i = 0; ifile[cat].cache_entries) @@ -299,7 +376,7 @@ static void flush_blocks (ISAMB b, int cat) } } -static int cache_block (ISAMB b, ISAM_P pos, char *userbuf, int wr) +static int cache_block (ISAMB b, ISAM_P pos, unsigned char *userbuf, int wr) { int cat = (int) (pos&CAT_MASK); int off = (int) (((pos/CAT_MAX) & @@ -336,9 +413,8 @@ static int cache_block (ISAMB b, ISAM_P pos, char *userbuf, int wr) return 1; } } - if (no >= 40) + if (no >= b->cache_size) { - assert (no == 40); assert (ce_last && *ce_last); ce_this = *ce_last; *ce_last = 0; /* remove the last entry from list */ @@ -382,6 +458,7 @@ void isamb_close (ISAMB isamb) yaz_log(YLOG_DEBUG, "isamb_close returned "ZINT_FORMAT" values, " "skipped "ZINT_FORMAT, isamb->skipped_numbers, isamb->returned_numbers); + for (i = 0; ino_cat; i++) { flush_blocks (isamb, i); @@ -389,7 +466,6 @@ void isamb_close (ISAMB isamb) { char hbuf[DST_BUF_SIZE]; int major = ISAMB_MAJOR_VERSION; - int minor = ISAMB_MINOR_VERSION; int len = 16; char *dst = hbuf + 16; int pos = 0, left; @@ -400,12 +476,17 @@ void isamb_close (ISAMB isamb) encode_ptr(&dst, isamb->file[i].head.block_size); encode_ptr(&dst, isamb->file[i].head.block_max); encode_ptr(&dst, isamb->file[i].head.free_list); + + if (isamb->minor_version >= ISAMB_MINOR_VERSION_WITH_ROOT) + encode_ptr(&dst, isamb->root_ptr); + memset(dst, '\0', b_size); /* ensure no random bytes are written */ len = dst - hbuf; /* print exactly 16 bytes (including trailing 0) */ - sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len); + sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, + isamb->minor_version, len); bf_write(isamb->file[i].bf, pos, 0, 0, hbuf); @@ -415,7 +496,8 @@ void isamb_close (ISAMB isamb) bf_write(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size); } } - bf_close (isamb->file[i].bf); + if (isamb->file[i].bf) + bf_close (isamb->file[i].bf); } xfree(isamb->file); xfree(isamb->method); @@ -449,14 +531,14 @@ static struct ISAMB_block *open_block(ISAMB b, ISAM_P pos) if (!cache_block (b, pos, p->buf, 0)) { yaz_log(b->log_io, "bf_read: open_block"); - if (!bf_read(b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf)) + if (bf_read(b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf) != 1) { yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) pos, (long) pos/CAT_MAX); - abort(); + zebra_exit("isamb:open_block"); } } - p->bytes = p->buf + offset; + p->bytes = (char *)p->buf + offset; p->leaf = p->buf[0]; p->size = (p->buf[1] + 256 * p->buf[2]) - offset; if (p->size < 0) @@ -465,7 +547,7 @@ static struct ISAMB_block *open_block(ISAMB b, ISAM_P pos) p->size, pos); } assert (p->size >= 0); - src = p->buf + 3; + src = (char*) p->buf + 3; decode_ptr(&src, &p->no_items); p->offset = 0; @@ -499,7 +581,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) { yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld", (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX); - abort (); + zebra_exit("isamb:new_block"); } } yaz_log(b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos, @@ -509,7 +591,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) p->cat = cat; b->file[cat].head_dirty = 1; memset (p->buf, 0, b->file[cat].head.block_size); - p->bytes = p->buf + b->file[cat].head.block_offset; + p->bytes = (char*)p->buf + b->file[cat].head.block_offset; p->leaf = leaf; p->size = 0; p->dirty = 1; @@ -559,7 +641,7 @@ static void check_block (ISAMB b, struct ISAMB_block *p) #else zint item_len; decode_item_len(&src, &item_len); - assert (item_len > 0 && item_len < 80); + assert (item_len > 0 && item_len < 128); src += item_len; #endif decode_ptr(&src, &pos); @@ -592,7 +674,7 @@ void close_block(ISAMB b, struct ISAMB_block *p) { int offset = b->file[p->cat].head.block_offset; int size = p->size + offset; - char *dst = p->buf + 3; + char *dst = (char*)p->buf + 3; assert (p->size >= 0); /* memset becuase encode_ptr usually does not write all bytes */ @@ -707,7 +789,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, #if INT_ENCODE const char *sub_item_ptr = sub_item; #endif - assert (sub_size < 80 && sub_size > 1); + assert (sub_size < 128 && sub_size > 1); memcpy (dst, startp, src - startp); @@ -732,11 +814,12 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, p->size = dst - dst_buf; assert (p->size >= 0); - if (p->size <= b->file[p->cat].head.block_max) { /* it fits OK in this block */ memcpy (startp, dst_buf, dst - dst_buf); + + close_block(b, sub_p2); } else { @@ -753,14 +836,22 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, const char *half; src = dst_buf; endp = dst; + + b->number_of_int_splits++; + + p->dirty = 1; + close_block(b, sub_p2); half = src + b->file[p->cat].head.block_size/2; decode_ptr(&src, &pos); - /* read sub block so we can get no_items for it */ - sub_p3 = open_block(b, pos); - no_items_first_half += sub_p3->no_items; - close_block(b, sub_p3); + if (b->enable_int_count) + { + /* read sub block so we can get no_items for it */ + sub_p3 = open_block(b, pos); + no_items_first_half += sub_p3->no_items; + close_block(b, sub_p3); + } while (src <= half) { @@ -775,10 +866,13 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, #endif decode_ptr(&src, &pos); - /* read sub block so we can get no_items for it */ - sub_p3 = open_block(b, pos); - no_items_first_half += sub_p3->no_items; - close_block(b, sub_p3); + if (b->enable_int_count) + { + /* read sub block so we can get no_items for it */ + sub_p3 = open_block(b, pos); + no_items_first_half += sub_p3->no_items; + close_block(b, sub_p3); + } } /* p is first half */ p_new_size = src - dst_buf; @@ -807,8 +901,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, (*sp)->no_items = p->no_items - no_items_first_half; p->no_items = no_items_first_half; } - p->dirty = 1; - close_block(b, sub_p2); + p->dirty = 1; } close_block(b, sub_p1); (*b->method->codec.stop)(c1); @@ -873,7 +966,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (!*lookahead_mode) { yaz_log(YLOG_WARN, "isamb: Inconsistent register (1)"); - assert (*lookahead_mode); + assert(*lookahead_mode); } } else @@ -991,7 +1084,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { /* this is append. So a delete is bad */ yaz_log(YLOG_WARN, "isamb: Inconsistent register (2)"); - abort(); + assert(*lookahead_mode); } else if (!half1 && dst > tail_cut) { @@ -1067,6 +1160,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, (*b->method->codec.reset)(c2); + b->number_of_leaf_splits++; + first_dst = (*sp2)->bytes; (*b->method->codec.encode)(c2, &first_dst, &cut_item); @@ -1191,7 +1286,7 @@ void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream) #endif encode_ptr(&dst, p->pos); - assert (sub_size < 80 && sub_size > 1); + assert (sub_size < 128 && sub_size > 1); #if INT_ENCODE (*b->method->codec.reset)(c1); (*b->method->codec.encode)(c1, &dst, &sub_item_ptr); @@ -1799,10 +1894,10 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) void isamb_pp_pos(ISAMB_PP pp, double *current, double *total) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ - struct ISAMB_block *p = pp->block[pp->level]; assert(total); assert(current); - assert(p->leaf); + + /* if end-of-stream PP may not be leaf */ *total = (double) (pp->block[0]->no_items); *current = (double) pp->returned_numbers; @@ -1944,3 +2039,33 @@ again: } return 1; } + +zint isamb_get_int_splits(ISAMB b) +{ + return b->number_of_int_splits; +} + +zint isamb_get_leaf_splits(ISAMB b) +{ + return b->number_of_leaf_splits; +} + +zint isamb_get_root_ptr(ISAMB b) +{ + return b->root_ptr; +} + +void isamb_set_root_ptr(ISAMB b, zint root_ptr) +{ + b->root_ptr = root_ptr; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +