X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frecindex.c;h=e41c56e8396fc2bb0f01e1526c00bf43ab1bc3c2;hp=b9cd4076afeb7184f94bf66bca4020e95abe1b61;hb=27bdd6aa26843aeac89f635ed495996088d8e8aa;hpb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e diff --git a/index/recindex.c b/index/recindex.c index b9cd407..e41c56e 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,8 +1,5 @@ -/* $Id: recindex.c,v 1.48 2006-05-10 08:13:22 adam Exp $ - Copyright (C) 1995-2005 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 2004-2013 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -15,971 +12,345 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. -*/ +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -#define RIDX_CHUNK 128 +*/ -/* - * Format of first block - * next (8 bytes) - * ref_count (2 bytes) - * block (500 bytes) - * - * Format of subsequent blocks - * next (8 bytes) - * block (502 bytes) - * - * Format of each record - * sysno - * (length, data) - pairs - * length = 0 if same as previous - */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include #include +#include #include -#include "recindxp.h" +#include "recindex.h" -#if HAVE_BZLIB_H -#include -#endif +#define RIDX_CHUNK 128 -/* Modify argument to if below: 1=normal, 0=sysno testing */ -#if 1 -/* If this is used sysno are not converted (no testing) */ -#define FAKE_OFFSET 0 -#define USUAL_RANGE 6000000000LL -#else -/* Use a fake > 2^32 offset so we can test for proper 64-bit handling */ -#define FAKE_OFFSET 6000000000LL -#define USUAL_RANGE 2000000000LL -#endif +struct recindex { + char *index_fname; + BFile index_BFile; + ISAMB isamb; + ISAM_P isam_p; +}; -static SYSNO rec_sysno_to_ext(SYSNO sysno) -{ - assert(sysno >= 0 && sysno <= USUAL_RANGE); - return sysno + FAKE_OFFSET; -} +struct record_index_entry { + zint next; /* first block of record info / next free entry */ + int size; /* size of record or 0 if free entry */ +} ent; -SYSNO rec_sysno_to_int(SYSNO sysno) -{ - assert(sysno >= FAKE_OFFSET && sysno <= FAKE_OFFSET + USUAL_RANGE); - return sysno - FAKE_OFFSET; -} -static ZEBRA_RES rec_write_head(Records p) +static void rect_log_item(int level, const void *b, const char *txt) { - int r; + zint sys; + int len; - assert(p); - assert(p->index_BFile); - r = bf_write(p->index_BFile, 0, 0, sizeof(p->head), &p->head); - if (r) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "write head of %s", p->index_fname); - return ZEBRA_FAIL; - } - return ZEBRA_OK; -} + memcpy(&sys, b, sizeof(sys)); + len = ((const char *) b)[sizeof(sys)]; -static void rec_tmp_expand(Records p, int size) -{ - if (p->tmp_size < size + 2048 || - p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2) + if (len == sizeof(struct record_index_entry)) { - xfree(p->tmp_buf); - p->tmp_size = size + (int) - (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048; - p->tmp_buf = (char *) xmalloc(p->tmp_size); + memcpy(&ent, (const char *)b + sizeof(sys) + 1, len); + yaz_log(YLOG_LOG, "%s " ZINT_FORMAT " next=" ZINT_FORMAT " sz=%d", txt, sys, + ent.next, ent.size); + } + else + yaz_log(YLOG_LOG, "%s " ZINT_FORMAT, txt, sys); } -static int read_indx(Records p, SYSNO sysno, void *buf, int itemsize, - int ignoreError) +int rect_compare(const void *a, const void *b) { - int r; - zint pos = (sysno-1)*itemsize; - int off = (int) (pos%RIDX_CHUNK); - int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + zint s_a, s_b; - if (sz1 > itemsize) - sz1 = itemsize; /* no more than itemsize bytes */ + memcpy(&s_a, a, sizeof(s_a)); + memcpy(&s_b, b, sizeof(s_b)); - r = bf_read(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); - if (r == 1 && sz1 < itemsize) /* boundary? - must read second part */ - r = bf_read(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, - (char*) buf + sz1); - if (r != 1 && !ignoreError) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at pos %ld", - p->index_fname, (long) pos); - } - return r; + if (s_a > s_b) + return 1; + else if (s_a < s_b) + return -1; + return 0; } -static void write_indx(Records p, SYSNO sysno, void *buf, int itemsize) +void *rect_code_start(void) { - zint pos = (sysno-1)*itemsize; - int off = (int) (pos%RIDX_CHUNK); - int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ - - if (sz1 > itemsize) - sz1 = itemsize; /* no more than itemsize bytes */ - - bf_write(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); - if (sz1 < itemsize) /* boundary? must write second part */ - bf_write(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, - (char*) buf + sz1); + return 0; } -static ZEBRA_RES rec_release_blocks(Records p, SYSNO sysno) +void rect_encode(void *p, char **dst, const char **src) { - struct record_index_entry entry; - zint freeblock; - char block_and_ref[sizeof(zint) + sizeof(short)]; - int dst_type; - int first = 1; + zint sys; + int len; - if (read_indx(p, sysno, &entry, sizeof(entry), 1) != 1) - return ZEBRA_FAIL; + memcpy(&sys, *src, sizeof(sys)); + zebra_zint_encode(dst, sys); + (*src) += sizeof(sys); - freeblock = entry.next; - assert(freeblock > 0); - dst_type = (int) (freeblock & 7); - assert(dst_type < REC_BLOCK_TYPES); - freeblock = freeblock / 8; - while (freeblock) - { - if (bf_read(p->data_BFile[dst_type], freeblock, 0, - first ? sizeof(block_and_ref) : sizeof(zint), - block_and_ref) != 1) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single"); - return ZEBRA_FAIL; - } - if (first) - { - short ref; - memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref)); - --ref; - memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); - if (ref) - { - if (bf_write(p->data_BFile[dst_type], freeblock, 0, - sizeof(block_and_ref), block_and_ref)) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); - return ZEBRA_FAIL; - } - return ZEBRA_OK; - } - first = 0; - } - - if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), - &p->head.block_free[dst_type])) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); - return ZEBRA_FAIL; - } - p->head.block_free[dst_type] = freeblock; - memcpy(&freeblock, block_and_ref, sizeof(freeblock)); + len = **src; + **dst = len; + (*src)++; + (*dst)++; - p->head.block_used[dst_type]--; - } - p->head.total_bytes -= entry.size; - return ZEBRA_OK; + memcpy(*dst, *src, len); + *dst += len; + *src += len; } -static ZEBRA_RES rec_delete_single(Records p, Record rec) +void rect_decode(void *p, char **dst, const char **src) { - struct record_index_entry entry; + zint sys; + int len; - if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK) - return ZEBRA_FAIL; + zebra_zint_decode(src, &sys); + memcpy(*dst, &sys, sizeof(sys)); + *dst += sizeof(sys); - entry.next = p->head.index_free; - entry.size = 0; - p->head.index_free = rec_sysno_to_int(rec->sysno); - write_indx(p, rec_sysno_to_int(rec->sysno), &entry, sizeof(entry)); - return ZEBRA_OK; -} + len = **src; + **dst = len; + (*src)++; + (*dst)++; -static ZEBRA_RES rec_write_tmp_buf(Records p, int size, SYSNO *sysnos) -{ - struct record_index_entry entry; - int no_written = 0; - char *cptr = p->tmp_buf; - zint block_prev = -1, block_free; - int dst_type = 0; - int i; - - for (i = 1; i= p->head.block_move[i]) - dst_type = i; - while (no_written < size) - { - block_free = p->head.block_free[dst_type]; - if (block_free) - { - if (bf_read(p->data_BFile[dst_type], - block_free, 0, sizeof(*p->head.block_free), - &p->head.block_free[dst_type]) != 1) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block " - ZINT_FORMAT, - p->data_fname[dst_type], block_free); - return ZEBRA_FAIL; - } - } - else - block_free = p->head.block_last[dst_type]++; - if (block_prev == -1) - { - entry.next = block_free*8 + dst_type; - entry.size = size; - p->head.total_bytes += size; - while (*sysnos > 0) - { - write_indx(p, *sysnos, &entry, sizeof(entry)); - sysnos++; - } - } - else - { - memcpy(cptr, &block_free, sizeof(block_free)); - bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr); - cptr = p->tmp_buf + no_written; - } - block_prev = block_free; - no_written += (int)(p->head.block_size[dst_type]) - sizeof(zint); - p->head.block_used[dst_type]++; - } - assert(block_prev != -1); - block_free = 0; - memcpy(cptr, &block_free, sizeof(block_free)); - bf_write(p->data_BFile[dst_type], block_prev, 0, - sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr); - return ZEBRA_OK; + memcpy(*dst, *src, len); + *dst += len; + *src += len; } -Records rec_open(BFiles bfs, int rw, int compression_method) +void rect_code_reset(void *p) { - Records p; - int i, r; - int version; - ZEBRA_RES ret = ZEBRA_OK; - - p = (Records) xmalloc(sizeof(*p)); - p->compression_method = compression_method; - p->rw = rw; - p->tmp_size = 1024; - p->index_fname = "reci"; - p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); - if (p->index_BFile == NULL) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); - xfree(p); - return 0; - } - p->tmp_buf = (char *) xmalloc(p->tmp_size); - r = bf_read(p->index_BFile, 0, 0, 0, p->tmp_buf); - switch (r) - { - case 0: - memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); - sprintf(p->head.version, "%3d", REC_VERSION); - p->head.index_free = 0; - p->head.index_last = 1; - p->head.no_records = 0; - p->head.total_bytes = 0; - for (i = 0; ihead.block_free[i] = 0; - p->head.block_last[i] = 1; - p->head.block_used[i] = 0; - } - p->head.block_size[0] = 128; - p->head.block_move[0] = 0; - for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; - p->head.block_move[i] = p->head.block_size[i] * 24; - } - if (rw) - { - if (rec_write_head(p) != ZEBRA_OK) - ret = ZEBRA_FAIL; - } - break; - case 1: - memcpy(&p->head, p->tmp_buf, sizeof(p->head)); - if (memcmp(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) - { - yaz_log(YLOG_FATAL, "file %s has bad format", p->index_fname); - ret = ZEBRA_FAIL; - } - version = atoi(p->head.version); - if (version != REC_VERSION) - { - yaz_log(YLOG_FATAL, "file %s is version %d, but version" - " %d is required", p->index_fname, version, REC_VERSION); - ret = ZEBRA_FAIL; - } - break; - } - for (i = 0; idata_fname[i] = (char *) xmalloc(strlen(str)+1); - strcpy(p->data_fname[i], str); - p->data_BFile[i] = NULL; - } - for (i = 0; idata_BFile[i] = bf_open(bfs, p->data_fname[i], - (int) (p->head.block_size[i]), - rw))) - { - yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]); - ret = ZEBRA_FAIL; - } - } - p->cache_max = 400; - p->cache_cur = 0; - p->record_cache = (struct record_cache_entry *) - xmalloc(sizeof(*p->record_cache)*p->cache_max); - zebra_mutex_init(&p->mutex); - if (ret == ZEBRA_FAIL) - rec_close(&p); - return p; } -static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len) +void rect_code_stop(void *p) { - (*len) = 0; - while (n > 127) - { - buf[*len] = 128 + (n & 127); - n = n >> 7; - (*len)++; - } - buf[*len] = n; - (*len)++; } -static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) + +recindex_t recindex_open(BFiles bfs, int rw, int use_isamb) { - unsigned n = 0; - unsigned w = 1; - (*len) = 0; + recindex_t p = xmalloc(sizeof(*p)); + p->index_BFile = 0; + p->isamb = 0; - while (buf[*len] > 127) + p->index_fname = "reci"; + p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); + if (p->index_BFile == NULL) { - n += w*(buf[*len] & 127); - w = w << 7; - (*len)++; + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); + xfree(p); + return 0; } - n += w * buf[*len]; - (*len)++; - *np = n; -} -static void rec_encode_zint(zint n, unsigned char *buf, int *len) -{ - (*len) = 0; - while (n > 127) + if (use_isamb) { - buf[*len] = (unsigned) (128 + (n & 127)); - n = n >> 7; - (*len)++; - } - buf[*len] = (unsigned) n; - (*len)++; -} + int isam_block_size = 4096; + ISAMC_M method; -static void rec_decode_zint(zint *np, unsigned char *buf, int *len) -{ - zint n = 0; - zint w = 1; - (*len) = 0; + method.compare_item = rect_compare; + method.log_item = rect_log_item; + method.codec.start = rect_code_start; + method.codec.encode = rect_encode; + method.codec.decode = rect_decode; + method.codec.reset = rect_code_reset; + method.codec.stop = rect_code_stop; - while (buf[*len] > 127) - { - n += w*(buf[*len] & 127); - w = w << 7; - (*len)++; - } - n += w * buf[*len]; - (*len)++; - *np = n; -} + p->index_fname = "rect"; + p->isamb = isamb_open2(bfs, p->index_fname, rw, &method, + /* cache */ 0, + /* no_cat */ 1, &isam_block_size, + /* use_root_ptr */ 1); -static void rec_cache_flush_block1(Records p, Record rec, Record last_rec, - char **out_buf, int *out_size, - int *out_offset) -{ - int i; - int len; + p->isam_p = 0; + if (p->isamb) + p->isam_p = isamb_get_root_ptr(p->isamb); - for (i = 0; isize[i] + 20 > *out_size) - { - int new_size = *out_offset + rec->size[i] + 65536; - char *np = (char *) xmalloc(new_size); - if (*out_offset) - memcpy(np, *out_buf, *out_offset); - xfree(*out_buf); - *out_size = new_size; - *out_buf = np; - } - if (i == 0) - { - rec_encode_zint(rec_sysno_to_int(rec->sysno), - (unsigned char *) *out_buf + *out_offset, &len); - (*out_offset) += len; - } - if (rec->size[i] == 0) - { - rec_encode_unsigned(1, (unsigned char *) *out_buf + *out_offset, - &len); - (*out_offset) += len; - } - else if (last_rec && rec->size[i] == last_rec->size[i] && - !memcmp(rec->info[i], last_rec->info[i], rec->size[i])) - { - rec_encode_unsigned(0, (unsigned char *) *out_buf + *out_offset, - &len); - (*out_offset) += len; - } - else - { - rec_encode_unsigned(rec->size[i]+1, - (unsigned char *) *out_buf + *out_offset, - &len); - (*out_offset) += len; - memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]); - (*out_offset) += rec->size[i]; - } } + return p; } -static ZEBRA_RES rec_write_multiple(Records p, int saveCount) +static void log_pr(const char *txt) { - int i; - short ref_count = 0; - char compression_method; - Record last_rec = 0; - int out_size = 1000; - int out_offset = 0; - char *out_buf = (char *) xmalloc(out_size); - SYSNO *sysnos = (SYSNO *) xmalloc(sizeof(*sysnos) * (p->cache_cur + 1)); - SYSNO *sysnop = sysnos; - ZEBRA_RES ret = ZEBRA_OK; - - for (i = 0; icache_cur - saveCount; i++) - { - struct record_cache_entry *e = p->record_cache + i; - switch (e->flag) - { - case recordFlagNew: - rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, - &out_size, &out_offset); - *sysnop++ = rec_sysno_to_int(e->rec->sysno); - ref_count++; - e->flag = recordFlagNop; - last_rec = e->rec; - break; - case recordFlagWrite: - if (rec_release_blocks(p, rec_sysno_to_int(e->rec->sysno)) - != ZEBRA_OK) - ret = ZEBRA_FAIL; - - rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, - &out_size, &out_offset); - *sysnop++ = rec_sysno_to_int(e->rec->sysno); - ref_count++; - e->flag = recordFlagNop; - last_rec = e->rec; - break; - case recordFlagDelete: - if (rec_delete_single(p, e->rec) != ZEBRA_OK) - ret = ZEBRA_FAIL; - - e->flag = recordFlagNop; - break; - default: - break; - } - } - - *sysnop = -1; - if (ref_count) - { - unsigned int csize = 0; /* indicate compression "not performed yet" */ - compression_method = p->compression_method; - switch (compression_method) - { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - csize = out_offset + (out_offset >> 6) + 620; - rec_tmp_expand(p, csize); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffCompress -#else - i = bzBuffToBuffCompress -#endif - (p->tmp_buf+sizeof(zint)+sizeof(short)+ - sizeof(char), - &csize, out_buf, out_offset, 1, 0, 30); - if (i != BZ_OK) - { - yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); - csize = 0; - } - yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, - csize); -#endif - break; - case REC_COMPRESS_NONE: - break; - } - if (!csize) - { - /* either no compression or compression not supported ... */ - csize = out_offset; - rec_tmp_expand(p, csize); - memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), - out_buf, out_offset); - csize = out_offset; - compression_method = REC_COMPRESS_NONE; - } - memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); - memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), - &compression_method, sizeof(compression_method)); - - /* -------- compression */ - if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos) - != ZEBRA_OK) - ret = ZEBRA_FAIL; - } - xfree(out_buf); - xfree(sysnos); - return ret; + yaz_log(YLOG_LOG, "%s", txt); } -static ZEBRA_RES rec_cache_flush(Records p, int saveCount) -{ - int i, j; - ZEBRA_RES ret; - - if (saveCount >= p->cache_cur) - saveCount = 0; - ret = rec_write_multiple(p, saveCount); - - for (i = 0; icache_cur - saveCount; i++) - { - struct record_cache_entry *e = p->record_cache + i; - rec_rm(&e->rec); - } - /* i still being used ... */ - for (j = 0; jrecord_cache+j, p->record_cache+i, - sizeof(*p->record_cache)); - p->cache_cur = saveCount; - return ret; -} - -static Record *rec_cache_lookup(Records p, SYSNO sysno, - enum recordCacheFlag flag) +void recindex_close(recindex_t p) { - int i; - for (i = 0; icache_cur; i++) + if (p) { - struct record_cache_entry *e = p->record_cache + i; - if (e->rec->sysno == sysno) + if (p->index_BFile) + bf_close(p->index_BFile); + if (p->isamb) { - if (flag != recordFlagNop && e->flag == recordFlagNop) - e->flag = flag; - return &e->rec; + isamb_set_root_ptr(p->isamb, p->isam_p); + isamb_dump(p->isamb, p->isam_p, log_pr); + isamb_close(p->isamb); } + xfree(p); } - return NULL; } -static ZEBRA_RES rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag) +int recindex_read_head(recindex_t p, void *buf) { - struct record_cache_entry *e; - ZEBRA_RES ret = ZEBRA_OK; - - if (p->cache_cur == p->cache_max) - ret = rec_cache_flush(p, 1); - else if (p->cache_cur > 0) - { - int i, j; - int used = 0; - for (i = 0; icache_cur; i++) - { - Record r = (p->record_cache + i)->rec; - for (j = 0; jsize[j]; - } - if (used > 90000) - ret = rec_cache_flush(p, 1); - } - assert(p->cache_cur < p->cache_max); - - e = p->record_cache + (p->cache_cur)++; - e->flag = flag; - e->rec = rec_cp(rec); - return ret; + return bf_read(p->index_BFile, 0, 0, 0, buf); } -ZEBRA_RES rec_close(Records *pp) +const char *recindex_get_fname(recindex_t p) { - Records p = *pp; - int i; - ZEBRA_RES ret = ZEBRA_OK; - - if (!p) - return ret; - - zebra_mutex_destroy(&p->mutex); - if (rec_cache_flush(p, 0) != ZEBRA_OK) - ret = ZEBRA_FAIL; + return p->index_fname; +} - xfree(p->record_cache); +ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len) +{ + int r; - if (p->rw) - { - if (rec_write_head(p) != ZEBRA_OK) - ret = ZEBRA_FAIL; - } + assert(p); - if (p->index_BFile) - bf_close(p->index_BFile); + assert(p->index_BFile); - for (i = 0; iindex_BFile, 0, 0, len, buf); + if (r) { - if (p->data_BFile[i]) - bf_close(p->data_BFile[i]); - xfree(p->data_fname[i]); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write head of %s", p->index_fname); + return ZEBRA_FAIL; } - xfree(p->tmp_buf); - xfree(p); - *pp = NULL; - return ret; + return ZEBRA_OK; } -static Record rec_get_int(Records p, SYSNO sysno) +int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, + int ignoreError) { - int i, in_size, r; - Record rec, *recp; - struct record_index_entry entry; - zint freeblock; - int dst_type; - char *nptr, *cptr; - char *in_buf = 0; - char *bz_buf = 0; -#if HAVE_BZLIB_H - unsigned int bz_size; -#endif - char compression_method; - - assert(sysno > 0); - assert(p); - - if ((recp = rec_cache_lookup(p, sysno, recordFlagNop))) - return rec_cp(*recp); - - if (read_indx(p, rec_sysno_to_int(sysno), &entry, sizeof(entry), 1) < 1) - return NULL; /* record is not there! */ - - if (!entry.size) - return NULL; /* record is deleted */ + int r = 0; + if (p->isamb) + { + if (p->isam_p) + { + char item[256]; + char *st = item; + char untilbuf[sizeof(zint) + 1]; - dst_type = (int) (entry.next & 7); - assert(dst_type < REC_BLOCK_TYPES); - freeblock = entry.next / 8; + ISAMB_PP isam_pp = isamb_pp_open(p->isamb, p->isam_p, 1); - assert(freeblock > 0); - - rec_tmp_expand(p, entry.size); + memcpy(untilbuf, &sysno, sizeof(sysno)); + untilbuf[sizeof(sysno)] = 0; + r = isamb_pp_forward(isam_pp, st, untilbuf); - cptr = p->tmp_buf; - r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); - if (r < 0) - return 0; - memcpy(&freeblock, cptr, sizeof(freeblock)); + isamb_pp_close(isam_pp); + if (!r) + return 0; - while (freeblock) - { - zint tmp; - - cptr += p->head.block_size[dst_type] - sizeof(freeblock); - - memcpy(&tmp, cptr, sizeof(tmp)); - r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); - if (r < 0) - return 0; - memcpy(&freeblock, cptr, sizeof(freeblock)); - memcpy(cptr, &tmp, sizeof(tmp)); + if (item[sizeof(sysno)] != itemsize) + { + yaz_log(YLOG_WARN, "unexpected entry size %d != %d", + item[sizeof(sysno)], itemsize); + return 0; + } + memcpy(buf, item + sizeof(sysno) + 1, itemsize); + } } - - rec = (Record) xmalloc(sizeof(*rec)); - rec->sysno = sysno; - memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short), - sizeof(compression_method)); - in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char); - in_size = entry.size - sizeof(short) - sizeof(char); - switch (compression_method) + else { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - bz_size = entry.size * 20 + 100; - while (1) - { - bz_buf = (char *) xmalloc(bz_size); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffDecompress -#else - i = bzBuffToBuffDecompress -#endif - (bz_buf, &bz_size, in_buf, in_size, 0, 0); - yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); - if (i == BZ_OK) - break; - yaz_log(YLOG_LOG, "failed"); - xfree(bz_buf); - bz_size *= 2; - } - in_buf = bz_buf; - in_size = bz_size; -#else - yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format"); - return 0; -#endif - break; - case REC_COMPRESS_NONE: - break; - } - for (i = 0; iinfo[i] = 0; + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ - nptr = in_buf; /* skip ref count */ - while (nptr < in_buf + in_size) - { - zint this_sysno; - int len; - rec_decode_zint(&this_sysno, (unsigned char *) nptr, &len); - nptr += len; - - for (i = 0; i < REC_NO_INFO; i++) - { - unsigned int this_size; - rec_decode_unsigned(&this_size, (unsigned char *) nptr, &len); - nptr += len; - - if (this_size == 0) - continue; - rec->size[i] = this_size-1; - - if (rec->size[i]) - { - rec->info[i] = nptr; - nptr += rec->size[i]; - } - else - rec->info[i] = NULL; - } - if (this_sysno == rec_sysno_to_int(sysno)) - break; + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ + + r = bf_read(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (r == 1 && sz1 < itemsize) /* boundary? - must read second part */ + r = bf_read(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); + if (r != 1 && !ignoreError) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at pos %ld", + p->index_fname, (long) pos); + } } - for (i = 0; iinfo[i] && rec->size[i]) - { - char *np = xmalloc(rec->size[i]+1); - memcpy(np, rec->info[i], rec->size[i]); - np[rec->size[i]] = '\0'; - rec->info[i] = np; - } - else - { - assert(rec->info[i] == 0); - assert(rec->size[i] == 0); - } + struct record_index_entry *ep = buf; + yaz_log(YLOG_LOG, "read r=%d sysno=" ZINT_FORMAT " next=" ZINT_FORMAT + " sz=%d", r, sysno, ep->next, ep->size); } - xfree(bz_buf); - if (rec_cache_insert(p, rec, recordFlagNop) != ZEBRA_OK) - return 0; - return rec; +#endif + return r; } -Record rec_get(Records p, SYSNO sysno) -{ - Record rec; - zebra_mutex_lock(&p->mutex); - - rec = rec_get_int(p, sysno); - zebra_mutex_unlock(&p->mutex); - return rec; -} +struct code_read_data { + int no; + zint sysno; + void *buf; + int itemsize; + int insert_flag; +}; -Record rec_get_root(Records p) +int bt_code_read(void *vp, char **dst, int *insertMode) { - return rec_get(p, rec_sysno_to_ext(1)); -} + struct code_read_data *s = (struct code_read_data *) vp; -static Record rec_new_int(Records p) -{ - int i; - SYSNO sysno; - Record rec; + if (s->no == 0) + return 0; - assert(p); - rec = (Record) xmalloc(sizeof(*rec)); - if (1 || p->head.index_free == 0) - sysno = (p->head.index_last)++; - else - { - struct record_index_entry entry; - - if (read_indx(p, p->head.index_free, &entry, sizeof(entry), 0) < 1) - { - xfree(rec); - return 0; - } - sysno = p->head.index_free; - p->head.index_free = entry.next; - } - (p->head.no_records)++; - rec->sysno = rec_sysno_to_ext(sysno); - for (i = 0; i < REC_NO_INFO; i++) - { - rec->info[i] = NULL; - rec->size[i] = 0; - } - rec_cache_insert(p, rec, recordFlagNew); - return rec; -} + (s->no)--; -Record rec_new(Records p) -{ - Record rec; - zebra_mutex_lock(&p->mutex); - - rec = rec_new_int(p); - zebra_mutex_unlock(&p->mutex); - return rec; + memcpy(*dst, &s->sysno, sizeof(zint)); + *dst += sizeof(zint); + **dst = s->itemsize; + (*dst)++; + memcpy(*dst, s->buf, s->itemsize); + *dst += s->itemsize; + *insertMode = s->insert_flag; + return 1; } -ZEBRA_RES rec_del(Records p, Record *recpp) +void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize) { - Record *recp; - ZEBRA_RES ret = ZEBRA_OK; - - zebra_mutex_lock(&p->mutex); - (p->head.no_records)--; - if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagDelete))) - { - rec_rm(recp); - *recp = *recpp; - } - else +#if 0 + yaz_log(YLOG_LOG, "write_indx sysno=" ZINT_FORMAT, sysno); +#endif + if (p->isamb) { - ret = rec_cache_insert(p, *recpp, recordFlagDelete); - rec_rm(recpp); - } - zebra_mutex_unlock(&p->mutex); - *recpp = NULL; - return ret; -} + struct code_read_data input; + ISAMC_I isamc_i; -ZEBRA_RES rec_put(Records p, Record *recpp) -{ - Record *recp; - ZEBRA_RES ret = ZEBRA_OK; + input.sysno = sysno; + input.buf = buf; + input.itemsize = itemsize; - zebra_mutex_lock(&p->mutex); - if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagWrite))) - { - rec_rm(recp); - *recp = *recpp; + isamc_i.clientData = &input; + isamc_i.read_item = bt_code_read; + + input.no = 1; + input.insert_flag = 2; + isamb_merge(p->isamb, &p->isam_p, &isamc_i); } else { - ret = rec_cache_insert(p, *recpp, recordFlagWrite); - rec_rm(recpp); - } - zebra_mutex_unlock(&p->mutex); - *recpp = NULL; - return ret; -} - -void rec_rm(Record *recpp) -{ - int i; - - if (!*recpp) - return ; - for (i = 0; i < REC_NO_INFO; i++) - xfree((*recpp)->info[i]); - xfree(*recpp); - *recpp = NULL; -} - -Record rec_cp(Record rec) -{ - Record n; - int i; - - n = (Record) xmalloc(sizeof(*n)); - n->sysno = rec->sysno; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - { - n->info[i] = NULL; - n->size[i] = 0; - } - else - { - n->size[i] = rec->size[i]; - n->info[i] = (char *) xmalloc(rec->size[i]); - memcpy(n->info[i], rec->info[i], rec->size[i]); - } - return n; -} - + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ -char *rec_strdup(const char *s, size_t *len) -{ - char *p; + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ - if (!s) - { - *len = 0; - return NULL; + bf_write(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (sz1 < itemsize) /* boundary? must write second part */ + bf_write(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); } - *len = strlen(s)+1; - p = (char *) xmalloc(*len); - strcpy(p, s); - return p; } + /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab