X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=2ac59fb7c94de153cdedc72794d92b7fb3057ed9;hb=0d4ce8fe167ca3df0c216179de0a2e9d22ce3a6a;hp=4302a53113ebaa0c3e909b6cbf701c528ac9a4d0;hpb=2e6e4817f094605bbd3455b95ac5fc69920dcc34;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index 4302a53..2ac59fb 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,9 +1,26 @@ -/* - * Copyright (c) 1995-1998, Index Data. - * See the file LICENSE for details. - * - * $Id: isamb.c,v 1.9 2002-04-23 17:56:54 adam Exp $ - */ +/* $Id: isamb.c,v 1.23 2003-03-02 23:12:50 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include #include #include #include @@ -14,6 +31,7 @@ struct ISAMB_head { int last_block; int block_size; int block_max; + int free_list; }; #define ISAMB_DATA_OFFSET 3 @@ -23,11 +41,22 @@ struct ISAMB_head { /* approx 2*4 K + max size of item */ #define DST_BUF_SIZE 8448 +#define ISAMB_CACHE_ENTRY_SIZE 4096 + +struct ISAMB_cache_entry { + ISAMB_P pos; + char *buf; + int dirty; + int hits; + struct ISAMB_cache_entry *next; +}; + struct ISAMB_file { BFile bf; int head_dirty; struct ISAMB_head head; + struct ISAMB_cache_entry *cache_entries; }; struct ISAMB_s { @@ -36,23 +65,31 @@ struct ISAMB_s { struct ISAMB_file *file; int no_cat; + int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */ + int log_io; /* log level for bf_read/bf_write calls */ + int log_freelist; /* log level for freelist handling */ }; struct ISAMB_block { - int pos; + ISAMB_P pos; int cat; int size; int leaf; int dirty; + int deleted; int offset; char *bytes; unsigned char *buf; void *decodeClientData; + int log_rw; }; struct ISAMB_PP_s { ISAMB isamb; + ISAMB_P pos; int level; + int total_size; + int no_blocks; struct ISAMB_block **block; }; @@ -68,50 +105,158 @@ void decode_ptr (char **src, int *pos) (*src) += sizeof(*pos); } -ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method) +ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method, + int cache) { ISAMB isamb = xmalloc (sizeof(*isamb)); - int i, b_size = 64; + int i, b_size = 32; isamb->bfs = bfs; isamb->method = (ISAMC_M) xmalloc (sizeof(*method)); memcpy (isamb->method, method, sizeof(*method)); isamb->no_cat = 4; + isamb->log_io = 0; + isamb->cache = cache; isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat); for (i = 0; ino_cat; i++) { char fname[DST_BUF_SIZE]; - isamb->file[i].head.first_block = 1; - isamb->file[i].head.last_block = 1; - isamb->file[i].head.block_size = b_size; - isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; - b_size = b_size * 4; + isamb->file[i].cache_entries = 0; + isamb->file[i].head_dirty = 0; + sprintf (fname, "%s%c", name, i+'A'); + if (cache) + isamb->file[i].bf = bf_open (bfs, fname, ISAMB_CACHE_ENTRY_SIZE, + writeflag); + else + isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag); + + + if (!bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), + &isamb->file[i].head)) + { + isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1; + isamb->file[i].head.last_block = isamb->file[i].head.first_block; + isamb->file[i].head.block_size = b_size; + isamb->file[i].head.block_max = b_size - ISAMB_DATA_OFFSET; + isamb->file[i].head.free_list = 0; + } + assert (isamb->file[i].head.block_size >= ISAMB_DATA_OFFSET); isamb->file[i].head_dirty = 0; - sprintf (fname, "%s-%d", name, i); - isamb->file[i].bf = - bf_open (bfs, fname, isamb->file[i].head.block_size, writeflag); - - bf_read (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), - &isamb->file[i].head); + assert(isamb->file[i].head.block_size == b_size); + b_size = b_size * 4; } return isamb; } +static void flush_blocks (ISAMB b, int cat) +{ + while (b->file[cat].cache_entries) + { + struct ISAMB_cache_entry *ce_this = b->file[cat].cache_entries; + b->file[cat].cache_entries = ce_this->next; + + if (ce_this->dirty) + { + yaz_log (b->log_io, "bf_write: flush_blocks"); + bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); + } + xfree (ce_this->buf); + xfree (ce_this); + } +} + +static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) +{ + int cat = pos&3; + int off = ((pos/4) & + (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1)) + * b->file[cat].head.block_size; + int norm = pos / (4*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size); + int no = 0; + struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0; + + if (!b->cache) + return 0; + + assert (ISAMB_CACHE_ENTRY_SIZE >= b->file[cat].head.block_size); + for (ce = &b->file[cat].cache_entries; *ce; ce = &(*ce)->next, no++) + { + ce_last = ce; + if ((*ce)->pos == norm) + { + ce_this = *ce; + *ce = (*ce)->next; /* remove from list */ + + ce_this->next = b->file[cat].cache_entries; /* move to front */ + b->file[cat].cache_entries = ce_this; + + if (wr) + { + memcpy (ce_this->buf + off, userbuf, + b->file[cat].head.block_size); + ce_this->dirty = 1; + } + else + memcpy (userbuf, ce_this->buf + off, + b->file[cat].head.block_size); + return 1; + } + } + if (no >= 40) + { + assert (no == 40); + assert (ce_last && *ce_last); + ce_this = *ce_last; + *ce_last = 0; /* remove the last entry from list */ + if (ce_this->dirty) + { + yaz_log (b->log_io, "bf_write: get_block"); + bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf); + } + xfree (ce_this->buf); + xfree (ce_this); + } + ce_this = xmalloc (sizeof(*ce_this)); + ce_this->next = b->file[cat].cache_entries; + b->file[cat].cache_entries = ce_this; + ce_this->buf = xmalloc (ISAMB_CACHE_ENTRY_SIZE); + ce_this->pos = norm; + yaz_log (b->log_io, "bf_read: get_block"); + if (!bf_read (b->file[cat].bf, norm, 0, 0, ce_this->buf)) + memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE); + if (wr) + { + memcpy (ce_this->buf + off, userbuf, b->file[cat].head.block_size); + ce_this->dirty = 1; + } + else + { + ce_this->dirty = 0; + memcpy (userbuf, ce_this->buf + off, b->file[cat].head.block_size); + } + return 1; +} + + void isamb_close (ISAMB isamb) { int i; for (i = 0; ino_cat; i++) { + flush_blocks (isamb, i); if (isamb->file[i].head_dirty) bf_write (isamb->file[i].bf, 0, 0, sizeof(struct ISAMB_head), &isamb->file[i].head); + + bf_close (isamb->file[i].bf); } xfree (isamb->file); xfree (isamb->method); xfree (isamb); } + struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) { int cat = pos&3; @@ -122,12 +267,23 @@ struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) p->pos = pos; p->cat = pos & 3; p->buf = xmalloc (b->file[cat].head.block_size); - bf_read (b->file[cat].bf, pos/4, 0, 0, p->buf); + + if (!get_block (b, pos, p->buf, 0)) + { + yaz_log (b->log_io, "bf_read: open_block"); + if (!bf_read (b->file[cat].bf, pos/4, 0, 0, p->buf)) + { + yaz_log (LOG_FATAL, "read failure for pos=%ld block=%ld", + (long) pos, (long) pos/4); + abort(); + } + } p->bytes = p->buf + ISAMB_DATA_OFFSET; p->leaf = p->buf[0]; p->size = p->buf[1] + 256 * p->buf[2] - ISAMB_DATA_OFFSET; p->offset = 0; p->dirty = 0; + p->deleted = 0; p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); return p; } @@ -135,20 +291,41 @@ struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) { struct ISAMB_block *p; - int block_no; - + p = xmalloc (sizeof(*p)); - block_no = b->file[cat].head.last_block++; - p->cat = cat; - p->pos = block_no * 4 + cat; + p->buf = xmalloc (b->file[cat].head.block_size); + + if (!b->file[cat].head.free_list) + { + int block_no; + block_no = b->file[cat].head.last_block++; + p->pos = block_no * 4 + cat; + } + else + { + p->pos = b->file[cat].head.free_list; + if (!get_block (b, p->pos, p->buf, 0)) + { + yaz_log (b->log_io, "bf_read: new_block"); + if (!bf_read (b->file[cat].bf, p->pos/4, 0, 0, p->buf)) + { + yaz_log (LOG_FATAL, "read failure for pos=%ld block=%ld", + (long) p->pos/4, (long) p->pos/4); + abort (); + } + } + yaz_log (b->log_freelist, "got block %d from freelist %d:%d", p->pos, + cat, p->pos/4); + memcpy (&b->file[cat].head.free_list, p->buf, sizeof(int)); + } p->cat = cat; b->file[cat].head_dirty = 1; - p->buf = xmalloc (b->file[cat].head.block_size); memset (p->buf, 0, b->file[cat].head.block_size); p->bytes = p->buf + ISAMB_DATA_OFFSET; p->leaf = leaf; p->size = 0; p->dirty = 1; + p->deleted = 0; p->offset = 0; p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); return p; @@ -169,13 +346,29 @@ void close_block (ISAMB b, struct ISAMB_block *p) { if (!p) return; - if (p->dirty) + if (p->deleted) + { + yaz_log (b->log_freelist, "release block %d from freelist %d:%d", + p->pos, p->cat, p->pos/4); + memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(int)); + b->file[p->cat].head.free_list = p->pos; + if (!get_block (b, p->pos, p->buf, 1)) + { + yaz_log (b->log_io, "bf_write: close_block"); + bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->buf); + } + } + else if (p->dirty) { int size = p->size + ISAMB_DATA_OFFSET; p->buf[0] = p->leaf; p->buf[1] = size & 255; p->buf[2] = size >> 8; - bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->buf); + if (!get_block (b, p->pos, p->buf, 1)) + { + yaz_log (b->log_io, "bf_write: close_block"); + bf_write (b->file[p->cat].bf, p->pos/4, 0, 0, p->buf); + } } (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData); xfree (p->buf); @@ -183,15 +376,16 @@ void close_block (ISAMB b, struct ISAMB_block *p) } int insert_sub (ISAMB b, struct ISAMB_block **p, - void *new_item, + void *new_item, int *mode, ISAMC_I stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, void *max_item); int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, + int *mode, ISAMC_I stream, struct ISAMB_block **sp, - void *split_item, int *split_size) + void *split_item, int *split_size, void *last_max_item) { char *startp = p->bytes; char *src = startp; @@ -215,7 +409,8 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, { sub_p1 = open_block (b, pos); assert (sub_p1); - more = insert_sub (b, &sub_p1, lookahead_item, stream, &sub_p2, + more = insert_sub (b, &sub_p1, lookahead_item, mode, + stream, &sub_p2, sub_item, &sub_size, src); break; } @@ -226,8 +421,8 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, { sub_p1 = open_block (b, pos); assert (sub_p1); - more = insert_sub (b, &sub_p1, lookahead_item, stream, &sub_p2, - sub_item, &sub_size, 0); + more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2, + sub_item, &sub_size, last_max_item); } if (sub_p2) { @@ -294,7 +489,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, - ISAMC_I stream, struct ISAMB_block **sp2, + int *lookahead_mode, ISAMC_I stream, struct ISAMB_block **sp2, void *sub_item, int *sub_size, void *max_item) { @@ -326,17 +521,23 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, char *dst_item = 0; char *dst_0 = dst; char *lookahead_next; - int lookahead_mode; int d = -1; if (lookahead_item) d = (*b->method->compare_item)(file_item_buf, lookahead_item); - if (d > 0) + if (d > 0) + { dst_item = lookahead_item; + assert (*lookahead_mode); + } else dst_item = file_item_buf; - if (!half1 && dst > cut) + if (!*lookahead_mode && d == 0) + { + p->dirty = 1; + } + else if (!half1 && dst > cut) { char *dst_item_0 = dst_item; half1 = dst; /* candidate for splitting */ @@ -362,15 +563,15 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, lookahead_next = lookahead_item; if (!(*stream->read_item)(stream->clientData, &lookahead_next, - &lookahead_mode)) + lookahead_mode)) { lookahead_item = 0; more = 0; } - if (max_item && + if (lookahead_item && max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { - yaz_log (LOG_LOG, "max_item 1"); + /* max_item 1 */ lookahead_item = 0; } @@ -381,7 +582,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { lookahead_next = lookahead_item; if (!(*stream->read_item)(stream->clientData, - &lookahead_next, &lookahead_mode)) + &lookahead_next, lookahead_mode)) { lookahead_item = 0; more = 0; @@ -403,17 +604,21 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; while (lookahead_item) { - int lookahead_mode; char *dst_item = lookahead_item; char *dst_0 = dst; if (max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { - yaz_log (LOG_LOG, "max_item 2"); + /* max_item 2 */ break; } - if (!half1 && dst > cut) + if (!*lookahead_mode) + { + yaz_log (LOG_WARN, "Inconsistent register (2)"); + abort(); + } + else if (!half1 && dst > cut) { char *dst_item_0 = dst_item; half1 = dst; /* candidate for splitting */ @@ -437,7 +642,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, p->dirty = 1; dst_item = lookahead_item; if (!(*stream->read_item)(stream->clientData, &dst_item, - &lookahead_mode)) + lookahead_mode)) { lookahead_item = 0; more = 0; @@ -448,6 +653,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, new_size > b->file[p->cat].head.block_max) { /* non-btree block will be removed */ + p->deleted = 1; close_block (b, p); /* delete it too!! */ p = 0; /* make a new one anyway */ @@ -503,16 +709,18 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, + int *mode, ISAMC_I stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, void *max_item) { if (!*p || (*p)->leaf) - return insert_leaf (b, p, new_item, stream, sp, sub_item, sub_size, - max_item); + return insert_leaf (b, p, new_item, mode, stream, sp, sub_item, + sub_size, max_item); else - return insert_int (b, *p, new_item, stream, sp, sub_item, sub_size); + return insert_int (b, *p, new_item, mode, stream, sp, sub_item, + sub_size, max_item); } int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I stream) @@ -522,6 +730,17 @@ int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I stream) int i_mode; int more; + if (b->cache < 0) + { + int more = 1; + while (more) + { + item_ptr = item_buf; + more = + (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); + } + return 1; + } item_ptr = item_buf; more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); while (more) @@ -532,7 +751,7 @@ int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I stream) if (pos) p = open_block (b, pos); - more = insert_sub (b, &p, item_buf, stream, &sp, + more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp, sub_item, &sub_size, 0); if (sp) { /* increase level of tree by one */ @@ -558,21 +777,27 @@ int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I stream) return pos; } -ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) +ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level) { ISAMB_PP pp = xmalloc (sizeof(*pp)); pp->isamb = isamb; pp->block = xmalloc (10 * sizeof(*pp->block)); + pp->pos = pos; pp->level = 0; + pp->total_size = 0; + pp->no_blocks = 0; while (1) { struct ISAMB_block *p = open_block (isamb, pos); char *src = p->bytes + p->offset; pp->block[pp->level] = p; - if (p->bytes[0]) /* leaf */ + pp->total_size += p->size; + pp->no_blocks++; + + if (p->leaf) break; decode_ptr (&src, &pos); @@ -580,20 +805,43 @@ ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) pp->level++; } pp->block[pp->level+1] = 0; + if (level) + *level = pp->level; return pp; } -void isamb_pp_close (ISAMB_PP pp) +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) +{ + return isamb_pp_open_x (isamb, pos, 0); +} + +void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) { int i; if (!pp) return; + if (size) + *size = pp->total_size; + if (blocks) + *blocks = pp->no_blocks; for (i = 0; i <= pp->level; i++) close_block (pp->isamb, pp->block[i]); xfree (pp->block); xfree (pp); } +int isamb_block_info (ISAMB isamb, int cat) +{ + if (cat >= 0 && cat < isamb->no_cat) + return isamb->file[cat].head.block_size; + return -1; +} + +void isamb_pp_close (ISAMB_PP pp) +{ + isamb_pp_close_x (pp, 0, 0); +} + int isamb_pp_read (ISAMB_PP pp, void *buf) { char *dst = buf; @@ -613,7 +861,7 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) pp->block[pp->level] = 0; (pp->level)--; p = pp->block[pp->level]; - assert (p->bytes[0] == 0); /* must be int */ + assert (!p->leaf); /* must be int */ } src = p->bytes + p->offset; @@ -628,6 +876,9 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) while (1) { pp->block[pp->level] = p = open_block (pp->isamb, pos); + + pp->total_size += p->size; + pp->no_blocks++; if (p->leaf) /* leaf */ { @@ -640,7 +891,7 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) } } assert (p->offset < p->size); - assert (p->bytes[0]); + assert (p->leaf); src = p->bytes + p->offset; (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData, &dst, &src);