X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=bc38a557bfb0343dd921d7725da5c24c6a292558;hb=842cbb4cd47fd56905c92429a6ede3a550b618cf;hp=be62d0981470994c3f09d646a2eaeb6133fe6f21;hpb=896c0427df9d8eff5de6a1735dcd992e067df844;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index be62d09..bc38a55 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,5 +1,5 @@ -/* $Id: isamb.c,v 1.19 2002-08-02 19:26:56 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: isamb.c,v 1.28 2004-05-30 18:04:49 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -20,7 +20,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +#include #include #include #include @@ -31,26 +31,36 @@ struct ISAMB_head { int last_block; int block_size; int block_max; + int free_list; }; #define ISAMB_DATA_OFFSET 3 +/* maximum size of encoded buffer */ #define DST_ITEM_MAX 256 -/* approx 2*4 K + max size of item */ -#define DST_BUF_SIZE 8448 +/* approx 2*max page + max size of item */ +#define DST_BUF_SIZE 16840 #define ISAMB_CACHE_ENTRY_SIZE 4096 +/* CAT_MAX: _must_ be power of 2 */ +#define CAT_MAX 4 +#define CAT_MASK (CAT_MAX-1) +/* CAT_NO: <= CAT_MAX */ +#define CAT_NO 4 + +/* ISAMB_PTR_CODEC=1 var, =0 fixed */ +#define ISAMB_PTR_CODEC 1 + struct ISAMB_cache_entry { ISAMB_P pos; - char *buf; + unsigned char *buf; int dirty; int hits; struct ISAMB_cache_entry *next; }; - struct ISAMB_file { BFile bf; int head_dirty; @@ -60,11 +70,13 @@ struct ISAMB_file { struct ISAMB_s { BFiles bfs; - ISAMC_M method; + ISAMC_M *method; struct ISAMB_file *file; int no_cat; int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */ + int log_io; /* log level for bf_read/bf_write calls */ + int log_freelist; /* log level for freelist handling */ }; struct ISAMB_block { @@ -73,10 +85,12 @@ struct ISAMB_block { int size; int leaf; int dirty; + int deleted; int offset; char *bytes; unsigned char *buf; void *decodeClientData; + int log_rw; }; struct ISAMB_PP_s { @@ -88,30 +102,66 @@ struct ISAMB_PP_s { struct ISAMB_block **block; }; -void encode_ptr (char **dst, int pos) +#if ISAMB_PTR_CODEC +static void encode_ptr (char **dst, unsigned pos) { - memcpy (*dst, &pos, sizeof(pos)); + unsigned char *bp = (unsigned char*) *dst; + + while (pos > 127) + { + *bp++ = 128 | (pos & 127); + pos = pos >> 7; + } + *bp++ = pos; + *dst = (char *) bp; +} +#else +static void encode_ptr (char **dst, unsigned pos) +{ + memcpy(*dst, &pos, sizeof(pos)); (*dst) += sizeof(pos); } +#endif + +#if ISAMB_PTR_CODEC +static void decode_ptr (char **src1, int *pos) +{ + unsigned char **src = (unsigned char **) src1; + unsigned d = 0; + unsigned char c; + unsigned r = 0; -void decode_ptr (char **src, int *pos) + while (((c = *(*src)++) & 128)) + { + d += ((c & 127) << r); + r += 7; + } + d += (c << r); + *pos = d; +} +#else +static void decode_ptr (char **src, int *pos) { - memcpy (pos, *src, sizeof(*pos)); - (*src) += sizeof(*pos); + memcpy (pos, *src, sizeof(*pos)); + (*src) += sizeof(*pos); } +#endif -ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method, +ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache) { ISAMB isamb = xmalloc (sizeof(*isamb)); int i, b_size = 32; isamb->bfs = bfs; - isamb->method = (ISAMC_M) xmalloc (sizeof(*method)); + isamb->method = (ISAMC_M *) xmalloc (sizeof(*method)); memcpy (isamb->method, method, sizeof(*method)); - isamb->no_cat = 4; + isamb->no_cat = CAT_NO; + isamb->log_io = 0; + isamb->log_freelist = 0; isamb->cache = cache; + assert (cache == 0); isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat); for (i = 0; ino_cat; i++) { @@ -133,6 +183,7 @@ ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method, isamb->file[i].head.last_block = isamb->file[i].head.first_block; isamb->file[i].head.block_size = b_size; isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; + isamb->file[i].head.free_list = 0; } assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET); isamb->file[i].head_dirty = 0; @@ -150,7 +201,10 @@ static void flush_blocks (ISAMB b, int cat) b->file[cat].cache_entries = ce_this->next; if (ce_this->dirty) + { + yaz_log (b->log_io, "bf_write: flush_blocks"); bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); + } xfree (ce_this->buf); xfree (ce_this); } @@ -158,11 +212,11 @@ static void flush_blocks (ISAMB b, int cat) static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) { - int cat = pos&3; - int off = ((pos/4) & + int cat = pos&CAT_MASK; + int off = ((pos/CAT_MAX) & (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1)) * b->file[cat].head.block_size; - int norm = pos / (4*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); + int norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); int no = 0; struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0; @@ -200,7 +254,10 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) ce_this = *ce_last; *ce_last = 0; /* remove the last entry from list */ if (ce_this->dirty) + { + yaz_log (b->log_io, "bf_write: get_block"); bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); + } xfree (ce_this->buf); xfree (ce_this); } @@ -209,6 +266,7 @@ static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) b->file[cat].cache_entries = ce_this; ce_this->buf = xmalloc (ISAMB_CACHE_ENTRY_SIZE); ce_this->pos = norm; + yaz_log (b->log_io, "bf_read: get_block"); if (!bf_read (b->file[cat].bf, norm, 0, 0, ce_this->buf)) memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE); if (wr) @@ -235,6 +293,7 @@ void isamb_close (ISAMB isamb) bf_write (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), &isamb->file[i].head); + bf_close (isamb->file[i].bf); } xfree (isamb->file); xfree (isamb->method); @@ -244,29 +303,36 @@ void isamb_close (ISAMB isamb) struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) { - int cat = pos&3; + int cat = pos&CAT_MASK; struct ISAMB_block *p; if (!pos) return 0; p = xmalloc (sizeof(*p)); p->pos = pos; - p->cat = pos & 3; + p->cat = pos & CAT_MASK; p->buf = xmalloc (b->file[cat].head.block_size); if (!get_block (b, pos, p->buf, 0)) { - if (!bf_read (b->file[cat].bf, pos/4, 0, 0, p->buf)) + yaz_log (b->log_io, "bf_read: open_block"); + if (!bf_read (b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf)) { - yaz_log (LOG_FATAL, "read failure for pos=%ld block=%ld", - (long) pos, (long) pos/4); + yaz_log (LOG_FATAL, "isamb: read fail for pos=%ld block=%ld", + (long) pos, (long) pos/CAT_MAX); abort(); } } p->bytes = p->buf + ISAMB_DATA_OFFSET; p->leaf = p->buf[0]; - p->size = p->buf[1] + 256 * p->buf[2] - ISAMB_DATA_OFFSET; + p->size = (p->buf[1] + 256 * p->buf[2]) - ISAMB_DATA_OFFSET; + if (p->size < 0) + { + fprintf (stderr, "pos=%d\n", pos); + } + assert (p->size >= 0); p->offset = 0; p->dirty = 0; + p->deleted = 0; p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); return p; } @@ -274,20 +340,42 @@ struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) { struct ISAMB_block *p; - int block_no; - + p = xmalloc (sizeof(*p)); - block_no = b->file[cat].head.last_block++; - p->cat = cat; - p->pos = block_no * 4 + cat; + p->buf = xmalloc (b->file[cat].head.block_size); + + if (!b->file[cat].head.free_list) + { + int block_no; + block_no = b->file[cat].head.last_block++; + p->pos = block_no * CAT_MAX + cat; + } + else + { + p->pos = b->file[cat].head.free_list; + assert((p->pos & CAT_MASK) == cat); + if (!get_block (b, p->pos, p->buf, 0)) + { + yaz_log (b->log_io, "bf_read: new_block"); + if (!bf_read (b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf)) + { + yaz_log (LOG_FATAL, "isamb: read fail for pos=%ld block=%ld", + (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX); + abort (); + } + } + yaz_log (b->log_freelist, "got block %d from freelist %d:%d", p->pos, + cat, p->pos/CAT_MAX); + memcpy (&b->file[cat].head.free_list, p->buf, sizeof(int)); + } p->cat = cat; b->file[cat].head_dirty = 1; - p->buf = xmalloc (b->file[cat].head.block_size); memset (p->buf, 0, b->file[cat].head.block_size); p->bytes = p->buf + ISAMB_DATA_OFFSET; p->leaf = leaf; p->size = 0; p->dirty = 1; + p->deleted = 0; p->offset = 0; p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); return p; @@ -304,18 +392,63 @@ struct ISAMB_block *new_int (ISAMB b, int cat) return new_block (b, 0, cat); } +static void check_block (ISAMB b, struct ISAMB_block *p) +{ + if (p->leaf) + { + ; + } + else + { + /* sanity check */ + char *startp = p->bytes; + char *src = startp; + char *endp = p->bytes + p->size; + int pos; + + decode_ptr (&src, &pos); + assert ((pos&CAT_MASK) == p->cat); + while (src != endp) + { + int item_len; + decode_ptr (&src, &item_len); + assert (item_len > 0 && item_len < 30); + src += item_len; + decode_ptr (&src, &pos); + assert ((pos&CAT_MASK) == p->cat); + } + } +} + void close_block (ISAMB b, struct ISAMB_block *p) { if (!p) return; - if (p->dirty) + if (p->deleted) + { + yaz_log (b->log_freelist, "release block %d from freelist %d:%d", + p->pos, p->cat, p->pos/CAT_MAX); + memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(int)); + b->file[p->cat].head.free_list = p->pos; + if (!get_block (b, p->pos, p->buf, 1)) + { + yaz_log (b->log_io, "bf_write: close_block (deleted)"); + bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); + } + } + else if (p->dirty) { int size = p->size + ISAMB_DATA_OFFSET; + assert (p->size >= 0); p->buf[0] = p->leaf; p->buf[1] = size & 255; p->buf[2] = size >> 8; + check_block(b, p); if (!get_block (b, p->pos, p->buf, 1)) - bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->buf); + { + yaz_log (b->log_io, "bf_write: close_block"); + bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf); + } } (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData); xfree (p->buf); @@ -324,14 +457,14 @@ void close_block (ISAMB b, struct ISAMB_block *p) int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, int *mode, - ISAMC_I stream, + ISAMC_I *stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, void *max_item); int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, int *mode, - ISAMC_I stream, struct ISAMB_block **sp, + ISAMC_I *stream, struct ISAMB_block **sp, void *split_item, int *split_size, void *last_max_item) { char *startp = p->bytes; @@ -345,11 +478,13 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, *sp = 0; + assert(p->size >= 0); decode_ptr (&src, &pos); while (src != endp) { int item_len; int d; + char *src0 = src; decode_ptr (&src, &item_len); d = (*b->method->compare_item)(src, lookahead_item); if (d > 0) @@ -359,6 +494,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2, sub_item, &sub_size, src); + src = src0; break; } src += item_len; @@ -395,6 +531,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, dst += endp - src; } p->size = dst - dst_buf; + assert (p->size >= 0); if (p->size <= b->file[p->cat].head.block_max) { memcpy (startp, dst_buf, dst - dst_buf); @@ -436,7 +573,8 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, - int *lookahead_mode, ISAMC_I stream, struct ISAMB_block **sp2, + int *lookahead_mode, ISAMC_I *stream, + struct ISAMB_block **sp2, void *sub_item, int *sub_size, void *max_item) { @@ -447,7 +585,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, void *c1 = (*b->method->code_start)(ISAMC_DECODE); void *c2 = (*b->method->code_start)(ISAMC_ENCODE); int more = 1; - int quater = b->file[b->no_cat-1].head.block_max / 4; + int quater = b->file[b->no_cat-1].head.block_max / CAT_MAX; char *cut = dst_buf + quater * 2; char *maxp = dst_buf + b->file[b->no_cat-1].head.block_max; char *half1 = 0; @@ -476,7 +614,11 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, if (d > 0) { dst_item = lookahead_item; - assert (*lookahead_mode); + if (!*lookahead_mode) + { + yaz_log (LOG_WARN, "isamb: Inconsistent register (1)"); + assert (*lookahead_mode); + } } else dst_item = file_item_buf; @@ -562,7 +704,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } if (!*lookahead_mode) { - yaz_log (LOG_WARN, "Inconsistent register (2)"); + yaz_log (LOG_WARN, "isamb: Inconsistent register (2)"); abort(); } else if (!half1 && dst > cut) @@ -600,6 +742,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, new_size > b->file[p->cat].head.block_max) { /* non-btree block will be removed */ + p->deleted = 1; close_block (b, p); /* delete it too!! */ p = 0; /* make a new one anyway */ @@ -656,7 +799,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, int *mode, - ISAMC_I stream, + ISAMC_I *stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, void *max_item) @@ -669,7 +812,36 @@ int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, sub_size, max_item); } -int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I stream) +int isamb_unlink (ISAMB b, ISAMC_P pos) +{ + struct ISAMB_block *p1; + + if (!pos) + return 0; + p1 = open_block(b, pos); + p1->deleted = 1; + if (!p1->leaf) + { + int sub_p; + int item_len; + char *src = p1->bytes + p1->offset; + + decode_ptr(&src, &sub_p); + isamb_unlink(b, sub_p); + + while (src != p1->bytes + p1->size) + { + decode_ptr(&src, &item_len); + src += item_len; + decode_ptr(&src, &sub_p); + isamb_unlink(b, sub_p); + } + } + close_block(b, p1); + return 0; +} + +int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) { char item_buf[DST_ITEM_MAX]; char *item_ptr; @@ -785,7 +957,7 @@ int isamb_block_info (ISAMB isamb, int cat) void isamb_pp_close (ISAMB_PP pp) { - return isamb_pp_close_x (pp, 0, 0); + isamb_pp_close_x (pp, 0, 0); } int isamb_pp_read (ISAMB_PP pp, void *buf)