X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frecindex.c;h=3280dfc10f65e9eef9ce8b6e7d8781fe77488116;hp=b8ccae7d80494a4708a0f5521fb0411cec49c953;hb=0f78f3fe78e859d9f0d3f0d3e13fcd28085dd427;hpb=f3e73ab63fbc960d863d9c14bab3b9e79c400ffa diff --git a/index/recindex.c b/index/recindex.c index b8ccae7..3280dfc 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,287 +1,609 @@ +/* $Id: recindex.c,v 1.53 2006-11-14 08:12:08 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#define RIDX_CHUNK 128 + /* - * Copyright (C) 1994-1995, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss + * Format of first block + * next (8 bytes) + * ref_count (2 bytes) + * block (500 bytes) * - * $Log: recindex.c,v $ - * Revision 1.2 1995-11-15 19:13:08 adam - * Work on record management. - * - * Revision 1.1 1995/11/15 14:46:20 adam - * Started work on better record management system. + * Format of subsequent blocks + * next (8 bytes) + * block (502 bytes) * + * Format of each record + * sysno + * (length, data) - pairs + * length = 0 if same as previous */ #include +#include #include #include -#include -#include -#include -#include - -#include "recindex.h" - -struct records_info { - int rw; - int index_fd; - char *index_fname; - int data_fd; - char *data_fname; - struct records_head { - char magic[8]; - int no_records; - int index_free; - int index_last; - int data_size; - int data_slack; - int data_used; - } head; - char *tmp_buf; - int tmp_size; - int cache_size; - int cache_cur; - int cache_max; - struct record_cache_entry *record_cache; -}; - -struct record_cache_entry { - Record rec; - int dirty; -}; - -struct record_index_entry { - union { - struct { - int offset; - int size; - } used; - struct { - int next; - } free; - } u; -}; - -#define REC_HEAD_MAGIC "rechead" - -char *rec_strdup (const char *s) + +#include +#include "recindxp.h" + +#if HAVE_BZLIB_H +#include +#endif + +/* Modify argument to if below: 1=normal, 0=sysno testing */ +#if 1 +/* If this is used sysno are not converted (no testing) */ +#define FAKE_OFFSET 0 +#define USUAL_RANGE 6000000000LL + +#else +/* Use a fake > 2^32 offset so we can test for proper 64-bit handling */ +#define FAKE_OFFSET 6000000000LL +#define USUAL_RANGE 2000000000LL +#endif + +static SYSNO rec_sysno_to_ext(SYSNO sysno) { - char *p; + assert(sysno >= 0 && sysno <= USUAL_RANGE); + return sysno + FAKE_OFFSET; +} - if (!s) - return NULL; - p = malloc (strlen(s)+1); - if (!p) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - strcpy (p, s); - return p; +SYSNO rec_sysno_to_int(SYSNO sysno) +{ + assert(sysno >= FAKE_OFFSET && sysno <= FAKE_OFFSET + USUAL_RANGE); + return sysno - FAKE_OFFSET; } -static void rec_write_head (Records p) +static ZEBRA_RES rec_write_head(Records p) { int r; - assert (p); - assert (p->index_fd != -1); - if (lseek (p->index_fd, (off_t) 0, SEEK_SET) == -1) + assert(p); + assert(p->index_BFile); + + r = bf_write(p->index_BFile, 0, 0, sizeof(p->head), &p->head); + if (r) { - logf (LOG_FATAL|LOG_ERRNO, "lseek to 0 in %s", p->index_fname); - exit (1); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write head of %s", p->index_fname); + return ZEBRA_FAIL; } - r = write (p->index_fd, &p->head, sizeof(p->head)); - switch (r) + return ZEBRA_OK; +} + +static void rec_tmp_expand(Records p, int size) +{ + if (p->tmp_size < size + 2048 || + p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2) { - case -1: - logf (LOG_FATAL|LOG_ERRNO, "write head of %s", p->index_fname); - exit (1); - case sizeof(p->head): - break; - default: - logf (LOG_FATAL, "write head of %s. wrote %d", p->index_fname, r); - exit (1); + xfree(p->tmp_buf); + p->tmp_size = size + (int) + (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048; + p->tmp_buf = (char *) xmalloc(p->tmp_size); } } -Records rec_open (int rw) +static int read_indx(Records p, SYSNO sysno, void *buf, int itemsize, + int ignoreError) { - Records p; int r; + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ + + r = bf_read(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (r == 1 && sz1 < itemsize) /* boundary? - must read second part */ + r = bf_read(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); + if (r != 1 && !ignoreError) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at pos %ld", + p->index_fname, (long) pos); + } + return r; +} + +static void write_indx(Records p, SYSNO sysno, void *buf, int itemsize) +{ + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ + + bf_write(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (sz1 < itemsize) /* boundary? must write second part */ + bf_write(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); +} + +static ZEBRA_RES rec_release_blocks(Records p, SYSNO sysno) +{ + struct record_index_entry entry; + zint freeblock; + char block_and_ref[sizeof(zint) + sizeof(short)]; + int dst_type; + int first = 1; + + if (read_indx(p, sysno, &entry, sizeof(entry), 1) != 1) + return ZEBRA_FAIL; + + freeblock = entry.next; + assert(freeblock > 0); + dst_type = CAST_ZINT_TO_INT(freeblock & 7); + assert(dst_type < REC_BLOCK_TYPES); + freeblock = freeblock / 8; + while (freeblock) + { + if (bf_read(p->data_BFile[dst_type], freeblock, 0, + first ? sizeof(block_and_ref) : sizeof(zint), + block_and_ref) != 1) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single"); + return ZEBRA_FAIL; + } + if (first) + { + short ref; + memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref)); + --ref; + memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); + if (ref) + { + if (bf_write(p->data_BFile[dst_type], freeblock, 0, + sizeof(block_and_ref), block_and_ref)) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); + return ZEBRA_FAIL; + } + return ZEBRA_OK; + } + first = 0; + } + + if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), + &p->head.block_free[dst_type])) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); + return ZEBRA_FAIL; + } + p->head.block_free[dst_type] = freeblock; + memcpy(&freeblock, block_and_ref, sizeof(freeblock)); + + p->head.block_used[dst_type]--; + } + p->head.total_bytes -= entry.size; + return ZEBRA_OK; +} + +static ZEBRA_RES rec_delete_single(Records p, Record rec) +{ + struct record_index_entry entry; + + if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK) + return ZEBRA_FAIL; - if (!(p = malloc (sizeof(*p)))) + entry.next = p->head.index_free; + entry.size = 0; + p->head.index_free = rec_sysno_to_int(rec->sysno); + write_indx(p, rec_sysno_to_int(rec->sysno), &entry, sizeof(entry)); + return ZEBRA_OK; +} + +static ZEBRA_RES rec_write_tmp_buf(Records p, int size, SYSNO *sysnos) +{ + struct record_index_entry entry; + int no_written = 0; + char *cptr = p->tmp_buf; + zint block_prev = -1, block_free; + int dst_type = 0; + int i; + + for (i = 1; i= p->head.block_move[i]) + dst_type = i; + while (no_written < size) { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); + block_free = p->head.block_free[dst_type]; + if (block_free) + { + if (bf_read(p->data_BFile[dst_type], + block_free, 0, sizeof(*p->head.block_free), + &p->head.block_free[dst_type]) != 1) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block " + ZINT_FORMAT, + p->data_fname[dst_type], block_free); + return ZEBRA_FAIL; + } + } + else + block_free = p->head.block_last[dst_type]++; + if (block_prev == -1) + { + entry.next = block_free*8 + dst_type; + entry.size = size; + p->head.total_bytes += size; + while (*sysnos > 0) + { + write_indx(p, *sysnos, &entry, sizeof(entry)); + sysnos++; + } + } + else + { + memcpy(cptr, &block_free, sizeof(block_free)); + bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr); + cptr = p->tmp_buf + no_written; + } + block_prev = block_free; + no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) + - sizeof(zint); + p->head.block_used[dst_type]++; } + assert(block_prev != -1); + block_free = 0; + memcpy(cptr, &block_free, sizeof(block_free)); + bf_write(p->data_BFile[dst_type], block_prev, 0, + sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr); + return ZEBRA_OK; +} + +Records rec_open(BFiles bfs, int rw, int compression_method) +{ + Records p; + int i, r; + int version; + ZEBRA_RES ret = ZEBRA_OK; + + p = (Records) xmalloc(sizeof(*p)); + p->compression_method = compression_method; p->rw = rw; - p->tmp_buf = NULL; - p->tmp_size = 0; - p->data_fname = "recdata"; - p->data_fd = -1; - p->index_fname = "recindex"; - p->index_fd = open (p->index_fname, - rw ? (O_RDWR|O_CREAT) : O_RDONLY, 0666); - if (p->index_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->index_fname); - exit (1); + p->tmp_size = 1024; + p->index_fname = "reci"; + p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); + if (p->index_BFile == NULL) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); + xfree(p); + return 0; } - r = read (p->index_fd, &p->head, sizeof(p->head)); + p->tmp_buf = (char *) xmalloc(p->tmp_size); + r = bf_read(p->index_BFile, 0, 0, 0, p->tmp_buf); switch (r) { - case -1: - logf (LOG_FATAL|LOG_ERRNO, "read %s", p->index_fname); - exit (1); case 0: + memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); + sprintf(p->head.version, "%3d", REC_VERSION); p->head.index_free = 0; p->head.index_last = 1; p->head.no_records = 0; - p->head.data_size = 0; - p->head.data_slack = 0; - p->head.data_used = 0; + p->head.total_bytes = 0; + for (i = 0; ihead.block_free[i] = 0; + p->head.block_last[i] = 1; + p->head.block_used[i] = 0; + } + p->head.block_size[0] = 128; + p->head.block_move[0] = 0; + for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; + p->head.block_move[i] = p->head.block_size[i] * 24; + } if (rw) - rec_write_head (p); + { + if (rec_write_head(p) != ZEBRA_OK) + ret = ZEBRA_FAIL; + } break; - case sizeof(p->head): - if (memcmp (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) + case 1: + memcpy(&p->head, p->tmp_buf, sizeof(p->head)); + if (memcmp(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) { - logf (LOG_FATAL, "read %s. bad header", p->index_fname); - exit (1); + yaz_log(YLOG_FATAL, "file %s has bad format", p->index_fname); + ret = ZEBRA_FAIL; } + version = atoi(p->head.version); + if (version != REC_VERSION) + { + yaz_log(YLOG_FATAL, "file %s is version %d, but version" + " %d is required", p->index_fname, version, REC_VERSION); + ret = ZEBRA_FAIL; + } break; - default: - logf (LOG_FATAL, "read head of %s. expected %d. got %d", - p->index_fname, sizeof(p->head), r); - exit (1); } - p->data_fd = open (p->data_fname, - rw ? (O_RDWR|O_CREAT) : O_RDONLY, 0666); - if (p->data_fd == -1) + for (i = 0; idata_fname); - exit (1); + char str[80]; + sprintf(str, "recd%c", i + 'A'); + p->data_fname[i] = (char *) xmalloc(strlen(str)+1); + strcpy(p->data_fname[i], str); + p->data_BFile[i] = NULL; } - p->cache_max = 100; - p->cache_cur = 0; - if (!(p->record_cache = malloc (sizeof(*p->record_cache)*p->cache_max))) + for (i = 0; idata_BFile[i] = + bf_open(bfs, p->data_fname[i], + CAST_ZINT_TO_INT(p->head.block_size[i]), rw))) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]); + ret = ZEBRA_FAIL; + break; + } } + p->cache_max = 400; + p->cache_cur = 0; + p->record_cache = (struct record_cache_entry *) + xmalloc(sizeof(*p->record_cache)*p->cache_max); + zebra_mutex_init(&p->mutex); + if (ret == ZEBRA_FAIL) + rec_close(&p); return p; } -static void read_indx (Records p, int sysno, void *buf, int itemsize) +static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len) { - int r; - off_t pos = (sysno-1)*itemsize + sizeof(p->head); - - if (lseek (p->index_fd, pos, SEEK_SET) == (pos) -1) + (*len) = 0; + while (n > 127) { - logf (LOG_FATAL|LOG_ERRNO, "seek in %s to pos %ld", - p->index_fname, (long) pos); - exit (1); - } - r = read (p->index_fd, buf, itemsize); - if (r != itemsize) - { - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", - p->index_fname, (long) pos); - else - logf (LOG_FATAL, "read in %s at pos %ld", - p->index_fname, (long) pos); - exit (1); + buf[*len] = 128 + (n & 127); + n = n >> 7; + (*len)++; } + buf[*len] = n; + (*len)++; } -static void rec_write_single (Records p, Record rec) +static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) { - struct record_index_entry entry; - int r, i, size = 0, got; - char *cptr; - off_t pos = (rec->sysno-1)*sizeof(entry) + sizeof(p->head); + unsigned n = 0; + unsigned w = 1; + (*len) = 0; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - size++; - else - size += strlen(rec->info[i])+1; - - entry.u.used.offset = p->head.data_size; - entry.u.used.size = size; - p->head.data_size += size; - p->head.data_used += size; + while (buf[*len] > 127) + { + n += w*(buf[*len] & 127); + w = w << 7; + (*len)++; + } + n += w * buf[*len]; + (*len)++; + *np = n; +} - if (lseek (p->index_fd, pos, SEEK_SET) == (pos) -1) +static void rec_encode_zint(zint n, unsigned char *buf, int *len) +{ + (*len) = 0; + while (n > 127) { - logf (LOG_FATAL|LOG_ERRNO, "seek in %s to pos %ld", - p->index_fname, (long) pos); - exit (1); + buf[*len] = (unsigned) (128 + (n & 127)); + n = n >> 7; + (*len)++; } - r = write (p->index_fd, &entry, sizeof(entry)); - if (r != sizeof(entry)) + buf[*len] = (unsigned) n; + (*len)++; +} + +static void rec_decode_zint(zint *np, unsigned char *buf, int *len) +{ + zint n = 0; + zint w = 1; + (*len) = 0; + + while (buf[*len] > 127) { - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "write of %s at pos %ld", - p->index_fname, (long) pos); - else - logf (LOG_FATAL, "write of %s at pos %ld", - p->index_fname, (long) pos); - exit (1); + n += w*(buf[*len] & 127); + w = w << 7; + (*len)++; } - if (lseek (p->data_fd, entry.u.used.offset, SEEK_SET) == -1) + n += w * buf[*len]; + (*len)++; + *np = n; +} + +static void rec_cache_flush_block1(Records p, Record rec, Record last_rec, + char **out_buf, int *out_size, + int *out_offset) +{ + int i; + int len; + + for (i = 0; idata_fname, entry.u.used.offset); - exit (1); + if (*out_offset + CAST_ZINT_TO_INT(rec->size[i]) + 20 > *out_size) + { + int new_size = *out_offset + rec->size[i] + 65536; + char *np = (char *) xmalloc(new_size); + if (*out_offset) + memcpy(np, *out_buf, *out_offset); + xfree(*out_buf); + *out_size = new_size; + *out_buf = np; + } + if (i == 0) + { + rec_encode_zint(rec_sysno_to_int(rec->sysno), + (unsigned char *) *out_buf + *out_offset, &len); + (*out_offset) += len; + } + if (rec->size[i] == 0) + { + rec_encode_unsigned(1, (unsigned char *) *out_buf + *out_offset, + &len); + (*out_offset) += len; + } + else if (last_rec && rec->size[i] == last_rec->size[i] && + !memcmp(rec->info[i], last_rec->info[i], rec->size[i])) + { + rec_encode_unsigned(0, (unsigned char *) *out_buf + *out_offset, + &len); + (*out_offset) += len; + } + else + { + rec_encode_unsigned(rec->size[i]+1, + (unsigned char *) *out_buf + *out_offset, + &len); + (*out_offset) += len; + memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]); + (*out_offset) += rec->size[i]; + } } - if (p->tmp_size < entry.u.used.size) +} + +static ZEBRA_RES rec_write_multiple(Records p, int saveCount) +{ + int i; + short ref_count = 0; + char compression_method; + Record last_rec = 0; + int out_size = 1000; + int out_offset = 0; + char *out_buf = (char *) xmalloc(out_size); + SYSNO *sysnos = (SYSNO *) xmalloc(sizeof(*sysnos) * (p->cache_cur + 1)); + SYSNO *sysnop = sysnos; + ZEBRA_RES ret = ZEBRA_OK; + + for (i = 0; icache_cur - saveCount; i++) { - free (p->tmp_buf); - p->tmp_size = entry.u.used.size + 16384; - if (!(p->tmp_buf = malloc (p->tmp_size))) + struct record_cache_entry *e = p->record_cache + i; + switch (e->flag) { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); + case recordFlagNew: + rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, + &out_size, &out_offset); + *sysnop++ = rec_sysno_to_int(e->rec->sysno); + ref_count++; + e->flag = recordFlagNop; + last_rec = e->rec; + break; + case recordFlagWrite: + if (rec_release_blocks(p, rec_sysno_to_int(e->rec->sysno)) + != ZEBRA_OK) + ret = ZEBRA_FAIL; + + rec_cache_flush_block1(p, e->rec, last_rec, &out_buf, + &out_size, &out_offset); + *sysnop++ = rec_sysno_to_int(e->rec->sysno); + ref_count++; + e->flag = recordFlagNop; + last_rec = e->rec; + break; + case recordFlagDelete: + if (rec_delete_single(p, e->rec) != ZEBRA_OK) + ret = ZEBRA_FAIL; + + e->flag = recordFlagNop; + break; + default: + break; } } - cptr = p->tmp_buf; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - *cptr++ = '\0'; - else - { - strcpy (cptr, rec->info[i]); - cptr += strlen(rec->info[i]) + 1; - } - for (got = 0; got < entry.u.used.size; got += r) + + *sysnop = -1; + if (ref_count) { - r = write (p->data_fd, p->tmp_buf + got, entry.u.used.size - got); - if (r <= 0) - { - logf (LOG_FATAL|LOG_ERRNO, "write of %s", p->data_fname); - exit (1); - } - got += r; + unsigned int csize = 0; /* indicate compression "not performed yet" */ + compression_method = p->compression_method; + switch (compression_method) + { + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + rec_tmp_expand(p, csize); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffCompress +#else + i = bzBuffToBuffCompress +#endif + (p->tmp_buf+sizeof(zint)+sizeof(short)+ + sizeof(char), + &csize, out_buf, out_offset, 1, 0, 30); + if (i != BZ_OK) + { + yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); + csize = 0; + } + yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, + csize); +#endif + break; + case REC_COMPRESS_NONE: + break; + } + if (!csize) + { + /* either no compression or compression not supported ... */ + csize = out_offset; + rec_tmp_expand(p, csize); + memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), + out_buf, out_offset); + csize = out_offset; + compression_method = REC_COMPRESS_NONE; + } + memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); + memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), + &compression_method, sizeof(compression_method)); + + /* -------- compression */ + if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos) + != ZEBRA_OK) + ret = ZEBRA_FAIL; } + xfree(out_buf); + xfree(sysnos); + return ret; } -static void rec_cache_flush (Records p) +static ZEBRA_RES rec_cache_flush(Records p, int saveCount) { - int i; - for (i = 0; icache_cur; i++) + int i, j; + ZEBRA_RES ret; + + if (saveCount >= p->cache_cur) + saveCount = 0; + + ret = rec_write_multiple(p, saveCount); + + for (i = 0; icache_cur - saveCount; i++) { struct record_cache_entry *e = p->record_cache + i; - if (e->dirty) - rec_write_single (p, e->rec); - rec_rm (e->rec); - } - p->cache_cur = 0; + rec_free(&e->rec); + } + /* i still being used ... */ + for (j = 0; jrecord_cache+j, p->record_cache+i, + sizeof(*p->record_cache)); + p->cache_cur = saveCount; + return ret; } -static Record *rec_cache_lookup (Records p, int sysno, int dirty) +static Record *rec_cache_lookup(Records p, SYSNO sysno, + enum recordCacheFlag flag) { int i; for (i = 0; icache_cur; i++) @@ -289,173 +611,380 @@ static Record *rec_cache_lookup (Records p, int sysno, int dirty) struct record_cache_entry *e = p->record_cache + i; if (e->rec->sysno == sysno) { - if (dirty) - e->dirty = 1; + if (flag != recordFlagNop && e->flag == recordFlagNop) + e->flag = flag; return &e->rec; } } return NULL; } -static void rec_cache_insert (Records p, Record rec, int dirty) +static ZEBRA_RES rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag) { struct record_cache_entry *e; + ZEBRA_RES ret = ZEBRA_OK; if (p->cache_cur == p->cache_max) - rec_cache_flush (p); - assert (p->cache_cur < p->cache_max); + ret = rec_cache_flush(p, 1); + else if (p->cache_cur > 0) + { + int i, j; + int used = 0; + for (i = 0; icache_cur; i++) + { + Record r = (p->record_cache + i)->rec; + for (j = 0; jsize[j]; + } + if (used > 90000) + ret = rec_cache_flush(p, 1); + } + assert(p->cache_cur < p->cache_max); e = p->record_cache + (p->cache_cur)++; - e->dirty = 1; - e->rec = rec_cp (rec); + e->flag = flag; + e->rec = rec_cp(rec); + return ret; } -void rec_close (Records *p) +ZEBRA_RES rec_close(Records *pp) { - assert (*p); + Records p = *pp; + int i; + ZEBRA_RES ret = ZEBRA_OK; - rec_cache_flush (*p); - free ((*p)->record_cache); + if (!p) + return ret; + + zebra_mutex_destroy(&p->mutex); + if (rec_cache_flush(p, 0) != ZEBRA_OK) + ret = ZEBRA_FAIL; - if ((*p)->index_fd != -1) - close ((*p)->index_fd); + xfree(p->record_cache); - if ((*p)->data_fd != -1) - close ((*p)->data_fd); + if (p->rw) + { + if (rec_write_head(p) != ZEBRA_OK) + ret = ZEBRA_FAIL; + } - free ((*p)->tmp_buf); + if (p->index_BFile) + bf_close(p->index_BFile); - free (*p); - *p = NULL; + for (i = 0; idata_BFile[i]) + bf_close(p->data_BFile[i]); + xfree(p->data_fname[i]); + } + xfree(p->tmp_buf); + xfree(p); + *pp = NULL; + return ret; } -Record rec_get (Records p, int sysno) +static Record rec_get_int(Records p, SYSNO sysno) { - int i; + int i, in_size, r; Record rec, *recp; struct record_index_entry entry; - int r, got; - char *nptr; + zint freeblock; + int dst_type; + char *nptr, *cptr; + char *in_buf = 0; + char *bz_buf = 0; +#if HAVE_BZLIB_H + unsigned int bz_size; +#endif + char compression_method; + + assert(sysno > 0); + assert(p); + + if ((recp = rec_cache_lookup(p, sysno, recordFlagNop))) + return rec_cp(*recp); + + if (read_indx(p, rec_sysno_to_int(sysno), &entry, sizeof(entry), 1) < 1) + return NULL; /* record is not there! */ - assert (sysno > 0); - assert (p); + if (!entry.size) + return NULL; /* record is deleted */ - if ((recp = rec_cache_lookup (p, sysno, 0))) - return rec_cp (*recp); + dst_type = (int) (entry.next & 7); + assert(dst_type < REC_BLOCK_TYPES); + freeblock = entry.next / 8; - read_indx (p, sysno, &entry, sizeof(entry)); + assert(freeblock > 0); - if (!(rec = malloc (sizeof(*rec)))) + rec_tmp_expand(p, entry.size); + + cptr = p->tmp_buf; + r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; + memcpy(&freeblock, cptr, sizeof(freeblock)); + + while (freeblock) { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); + zint tmp; + + cptr += p->head.block_size[dst_type] - sizeof(freeblock); + + memcpy(&tmp, cptr, sizeof(tmp)); + r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; + memcpy(&freeblock, cptr, sizeof(freeblock)); + memcpy(cptr, &tmp, sizeof(tmp)); } - if (lseek (p->data_fd, entry.u.used.offset, SEEK_SET) == -1) + + rec = (Record) xmalloc(sizeof(*rec)); + rec->sysno = sysno; + memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short), + sizeof(compression_method)); + in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char); + in_size = entry.size - sizeof(short) - sizeof(char); + switch (compression_method) { - logf (LOG_FATAL|LOG_ERRNO, "lseek in %s to pos %ld", - p->data_fname, entry.u.used.offset); - exit (1); + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + bz_size = entry.size * 20 + 100; + while (1) + { + bz_buf = (char *) xmalloc(bz_size); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffDecompress +#else + i = bzBuffToBuffDecompress +#endif + (bz_buf, &bz_size, in_buf, in_size, 0, 0); + yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); + if (i == BZ_OK) + break; + yaz_log(YLOG_LOG, "failed"); + xfree(bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; +#else + yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format"); + return 0; +#endif + break; + case REC_COMPRESS_NONE: + break; } - if (p->tmp_size < entry.u.used.size) + for (i = 0; iinfo[i] = 0; + + nptr = in_buf; /* skip ref count */ + while (nptr < in_buf + in_size) { - free (p->tmp_buf); - p->tmp_size = entry.u.used.size + 16384; - if (!(p->tmp_buf = malloc (p->tmp_size))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } + zint this_sysno; + int len; + rec_decode_zint(&this_sysno, (unsigned char *) nptr, &len); + nptr += len; + + for (i = 0; i < REC_NO_INFO; i++) + { + unsigned int this_size; + rec_decode_unsigned(&this_size, (unsigned char *) nptr, &len); + nptr += len; + + if (this_size == 0) + continue; + rec->size[i] = this_size-1; + + if (rec->size[i]) + { + rec->info[i] = nptr; + nptr += rec->size[i]; + } + else + rec->info[i] = NULL; + } + if (this_sysno == rec_sysno_to_int(sysno)) + break; } - for (got = 0; got < entry.u.used.size; got += r) + for (i = 0; idata_fd, p->tmp_buf + got, entry.u.used.size - got); - if (r <= 0) - { - logf (LOG_FATAL|LOG_ERRNO, "read of %s", p->data_fname); - exit (1); - } - got += r; + if (rec->info[i] && rec->size[i]) + { + char *np = xmalloc(rec->size[i]+1); + memcpy(np, rec->info[i], rec->size[i]); + np[rec->size[i]] = '\0'; + rec->info[i] = np; + } + else + { + assert(rec->info[i] == 0); + assert(rec->size[i] == 0); + } } - rec->sysno = sysno; + xfree(bz_buf); + if (rec_cache_insert(p, rec, recordFlagNop) != ZEBRA_OK) + return 0; + return rec; +} - nptr = p->tmp_buf; - for (i = 0; i < REC_NO_INFO; i++) - if (*nptr) - { - rec->info[i] = rec_strdup (nptr); - nptr += strlen(nptr)+1; - } - else - { - nptr++; - rec->info[i] = NULL; - } - rec_cache_insert (p, rec, 0); +Record rec_get(Records p, SYSNO sysno) +{ + Record rec; + zebra_mutex_lock(&p->mutex); + + rec = rec_get_int(p, sysno); + zebra_mutex_unlock(&p->mutex); return rec; } -Record rec_new (Records p) +Record rec_get_root(Records p) { - int sysno, i; + return rec_get(p, rec_sysno_to_ext(1)); +} + +static Record rec_new_int(Records p) +{ + int i; + SYSNO sysno; Record rec; - assert (p); - if (!(rec = malloc (sizeof(*rec)))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - if (p->head.index_free == 0) + assert(p); + rec = (Record) xmalloc(sizeof(*rec)); + if (1 || p->head.index_free == 0) sysno = (p->head.index_last)++; else { struct record_index_entry entry; - read_indx (p, p->head.index_free, &entry, sizeof(entry)); + if (read_indx(p, p->head.index_free, &entry, sizeof(entry), 0) < 1) + { + xfree(rec); + return 0; + } sysno = p->head.index_free; - p->head.index_free = entry.u.free.next; + p->head.index_free = entry.next; } (p->head.no_records)++; - rec->sysno = sysno; + rec->sysno = rec_sysno_to_ext(sysno); for (i = 0; i < REC_NO_INFO; i++) + { rec->info[i] = NULL; - rec_cache_insert (p, rec, 1); + rec->size[i] = 0; + } + rec_cache_insert(p, rec, recordFlagNew); return rec; } -void rec_put (Records p, Record rec) +Record rec_new(Records p) +{ + Record rec; + zebra_mutex_lock(&p->mutex); + + rec = rec_new_int(p); + zebra_mutex_unlock(&p->mutex); + return rec; +} + +ZEBRA_RES rec_del(Records p, Record *recpp) { Record *recp; + ZEBRA_RES ret = ZEBRA_OK; - if ((recp = rec_cache_lookup (p, rec->sysno, 1))) + zebra_mutex_lock(&p->mutex); + (p->head.no_records)--; + if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagDelete))) { - rec_rm (*recp); - *recp = rec_cp (rec); + rec_free(recp); + *recp = *recpp; } else - rec_cache_insert (p, rec, 1); + { + ret = rec_cache_insert(p, *recpp, recordFlagDelete); + rec_free(recpp); + } + zebra_mutex_unlock(&p->mutex); + *recpp = NULL; + return ret; } -void rec_rm (Record rec) +ZEBRA_RES rec_put(Records p, Record *recpp) +{ + Record *recp; + ZEBRA_RES ret = ZEBRA_OK; + + zebra_mutex_lock(&p->mutex); + if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagWrite))) + { + rec_free(recp); + *recp = *recpp; + } + else + { + ret = rec_cache_insert(p, *recpp, recordFlagWrite); + rec_free(recpp); + } + zebra_mutex_unlock(&p->mutex); + *recpp = NULL; + return ret; +} + +void rec_free(Record *recpp) { int i; + + if (!*recpp) + return ; for (i = 0; i < REC_NO_INFO; i++) - free (rec->info[i]); - free (rec); + xfree((*recpp)->info[i]); + xfree(*recpp); + *recpp = NULL; } -Record rec_cp (Record rec) +Record rec_cp(Record rec) { Record n; int i; - if (!(n = malloc (sizeof(*n)))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } + n = (Record) xmalloc(sizeof(*n)); n->sysno = rec->sysno; for (i = 0; i < REC_NO_INFO; i++) - n->info[i] = rec_strdup (rec->info[i]); + if (!rec->info[i]) + { + n->info[i] = NULL; + n->size[i] = 0; + } + else + { + n->size[i] = rec->size[i]; + n->info[i] = (char *) xmalloc(rec->size[i]+1); + memcpy(n->info[i], rec->info[i], rec->size[i]); + n->info[i][rec->size[i]] = '\0'; + } return n; } + + +char *rec_strdup(const char *s, size_t *len) +{ + char *p; + + if (!s) + { + *len = 0; + return NULL; + } + *len = strlen(s)+1; + p = (char *) xmalloc(*len); + strcpy(p, s); + return p; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +