-/* $Id: records.c,v 1.1 2007-11-23 13:52:52 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+ Copyright (C) 2004-2013 Index Data
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
*/
/*
- * Format of first block
+ * Format of first block (assumes a 512 block size)
* next (8 bytes)
* ref_count (2 bytes)
* block (500 bytes)
*
- * Format of subsequent blocks
+ * Format of subsequent blocks
* next (8 bytes)
* block (502 bytes)
*
* (length, data) - pairs
* length = 0 if same as previous
*/
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <yaz/yaz-util.h>
-#include "recindxp.h"
+#include <idzebra/bfile.h>
+#include "recindex.h"
#if HAVE_BZLIB_H
#include <bzlib.h>
#endif
+#if HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+#define REC_BLOCK_TYPES 2
+#define REC_HEAD_MAGIC "recindex"
+#define REC_VERSION 5
+
+struct records_info {
+ int rw;
+ int compression_method;
+
+ recindex_t recindex;
+
+ char *data_fname[REC_BLOCK_TYPES];
+ BFile data_BFile[REC_BLOCK_TYPES];
+
+ char *tmp_buf;
+ int tmp_size;
+
+ struct record_cache_entry *record_cache;
+ int cache_size;
+ int cache_cur;
+ int cache_max;
+
+ int compression_chunk_size;
+
+ Zebra_mutex mutex;
+
+ struct records_head {
+ char magic[8];
+ char version[4];
+ zint block_size[REC_BLOCK_TYPES];
+ zint block_free[REC_BLOCK_TYPES];
+ zint block_last[REC_BLOCK_TYPES];
+ zint block_used[REC_BLOCK_TYPES];
+ zint block_move[REC_BLOCK_TYPES];
+
+ zint total_bytes;
+ zint index_last;
+ zint index_free;
+ zint no_records;
+
+ } head;
+};
+
+enum recordCacheFlag { recordFlagNop, recordFlagWrite, recordFlagNew,
+ recordFlagDelete };
+
+struct record_cache_entry {
+ Record rec;
+ enum recordCacheFlag flag;
+};
+
+struct record_index_entry {
+ zint next; /* first block of record info / next free entry */
+ int size; /* size of record or 0 if free entry */
+};
+
+Record rec_cp(Record rec);
/* Modify argument to if below: 1=normal, 0=sysno testing */
#if 1
memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref));
if (ref)
{
+ /* there is still a reference to this block.. */
if (bf_write(p->data_BFile[dst_type], freeblock, 0,
sizeof(block_and_ref), block_and_ref))
{
}
return ZEBRA_OK;
}
- first = 0;
+ /* the list of blocks can all be removed (ref == 0) */
+ first = 0;
}
-
+
if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock),
&p->head.block_free[dst_type]))
{
cptr = p->tmp_buf + no_written;
}
block_prev = block_free;
- no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type])
+ no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type])
- sizeof(zint);
p->head.block_used[dst_type]++;
}
return ZEBRA_OK;
}
+int rec_check_compression_method(int compression_method)
+{
+ switch(compression_method)
+ {
+ case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+ return 1;
+#else
+ return 0;
+#endif
+ case REC_COMPRESS_BZIP2:
+#if HAVE_BZLIB_H
+ return 1;
+#else
+ return 0;
+#endif
+ case REC_COMPRESS_NONE:
+ return 1;
+ }
+ return 0;
+}
+
Records rec_open(BFiles bfs, int rw, int compression_method)
{
Records p;
memset(&p->head, '\0', sizeof(p->head));
p->compression_method = compression_method;
p->rw = rw;
- p->tmp_size = 1024;
- p->recindex = recindex_open(bfs, rw);
+ p->tmp_size = 4096;
p->tmp_buf = (char *) xmalloc(p->tmp_size);
+ p->compression_chunk_size = 0;
+ if (compression_method == REC_COMPRESS_BZIP2)
+ p->compression_chunk_size = 90000;
+ p->recindex = recindex_open(bfs, rw, 0 /* 1=isamb for recindex */);
r = recindex_read_head(p->recindex, p->tmp_buf);
switch (r)
{
p->head.block_last[i] = 1;
p->head.block_used[i] = 0;
}
- p->head.block_size[0] = 128;
+ p->head.block_size[0] = 256;
p->head.block_move[0] = 0;
for (i = 1; i<REC_BLOCK_TYPES; i++)
{
- p->head.block_size[i] = p->head.block_size[i-1] * 4;
- p->head.block_move[i] = p->head.block_size[i] * 24;
+ p->head.block_size[i] = p->head.block_size[i-1] * 8;
+ p->head.block_move[i] = p->head.block_size[i] * 2;
}
if (rw)
{
- if (recindex_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK)
+ if (recindex_write_head(p->recindex,
+ &p->head, sizeof(p->head)) != ZEBRA_OK)
ret = ZEBRA_FAIL;
}
break;
}
if (i == 0)
{
- rec_encode_zint(rec_sysno_to_int(rec->sysno),
+ rec_encode_zint(rec_sysno_to_int(rec->sysno),
(unsigned char *) *out_buf + *out_offset, &len);
(*out_offset) += len;
}
}
}
+static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos,
+ char *out_buf, int out_offset)
+{
+ ZEBRA_RES ret = ZEBRA_OK;
+ if (ref_count)
+ {
+ int i;
+ unsigned int csize = 0; /* indicate compression "not performed yet" */
+ char compression_method = p->compression_method;
+ switch (compression_method)
+ {
+ case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+ csize = out_offset + (out_offset >> 6) + 620;
+ while (1)
+ {
+ int r;
+ uLongf destLen = csize;
+ rec_tmp_expand(p, csize);
+ r = compress((Bytef *) p->tmp_buf+sizeof(zint)+sizeof(short)+
+ sizeof(char),
+ &destLen, (const Bytef *) out_buf, out_offset);
+ csize = destLen;
+ if (r == Z_OK)
+ {
+ break;
+ }
+ if (r != Z_MEM_ERROR)
+ {
+ yaz_log(YLOG_WARN, "compress error: %d", r);
+ csize = 0;
+ break;
+ }
+ csize = csize * 2;
+ }
+#endif
+ break;
+ case REC_COMPRESS_BZIP2:
+#if HAVE_BZLIB_H
+ csize = out_offset + (out_offset >> 6) + 620;
+ rec_tmp_expand(p, csize);
+#ifdef BZ_CONFIG_ERROR
+ i = BZ2_bzBuffToBuffCompress
+#else
+ i = bzBuffToBuffCompress
+#endif
+ (p->tmp_buf+sizeof(zint)+sizeof(short)+
+ sizeof(char),
+ &csize, out_buf, out_offset, 1, 0, 30);
+ if (i != BZ_OK)
+ {
+ yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i);
+ csize = 0;
+ }
+#endif
+ break;
+ case REC_COMPRESS_NONE:
+ break;
+ }
+ if (!csize)
+ {
+ /* either no compression or compression not supported ... */
+ csize = out_offset;
+ rec_tmp_expand(p, csize);
+ memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char),
+ out_buf, out_offset);
+ csize = out_offset;
+ compression_method = REC_COMPRESS_NONE;
+ }
+ memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count));
+ memcpy(p->tmp_buf + sizeof(zint)+sizeof(short),
+ &compression_method, sizeof(compression_method));
+
+ /* -------- compression */
+ if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos)
+ != ZEBRA_OK)
+ ret = ZEBRA_FAIL;
+ }
+ return ret;
+}
+
static ZEBRA_RES rec_write_multiple(Records p, int saveCount)
{
int i;
short ref_count = 0;
- char compression_method;
Record last_rec = 0;
int out_size = 1000;
int out_offset = 0;
e->flag = recordFlagNop;
break;
- default:
+ case recordFlagNop:
break;
+ default:
+ break;
}
}
*sysnop = -1;
- if (ref_count)
- {
- unsigned int csize = 0; /* indicate compression "not performed yet" */
- compression_method = p->compression_method;
- switch (compression_method)
- {
- case REC_COMPRESS_BZIP2:
-#if HAVE_BZLIB_H
- csize = out_offset + (out_offset >> 6) + 620;
- rec_tmp_expand(p, csize);
-#ifdef BZ_CONFIG_ERROR
- i = BZ2_bzBuffToBuffCompress
-#else
- i = bzBuffToBuffCompress
-#endif
- (p->tmp_buf+sizeof(zint)+sizeof(short)+
- sizeof(char),
- &csize, out_buf, out_offset, 1, 0, 30);
- if (i != BZ_OK)
- {
- yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i);
- csize = 0;
- }
- yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset,
- csize);
-#endif
- break;
- case REC_COMPRESS_NONE:
- break;
- }
- if (!csize)
- {
- /* either no compression or compression not supported ... */
- csize = out_offset;
- rec_tmp_expand(p, csize);
- memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char),
- out_buf, out_offset);
- csize = out_offset;
- compression_method = REC_COMPRESS_NONE;
- }
- memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count));
- memcpy(p->tmp_buf + sizeof(zint)+sizeof(short),
- &compression_method, sizeof(compression_method));
-
- /* -------- compression */
- if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos)
- != ZEBRA_OK)
- ret = ZEBRA_FAIL;
- }
+ rec_flush_shared(p, ref_count, sysnos, out_buf, out_offset);
xfree(out_buf);
xfree(sysnos);
return ret;
{
struct record_cache_entry *e = p->record_cache + i;
rec_free(&e->rec);
- }
+ }
/* i still being used ... */
for (j = 0; j<saveCount; j++, i++)
memcpy(p->record_cache+j, p->record_cache+i,
for (j = 0; j<REC_NO_INFO; j++)
used += r->size[j];
}
- if (used > 90000)
+ if (used > p->compression_chunk_size)
ret = rec_cache_flush(p, 1);
}
assert(p->cache_cur < p->cache_max);
char *nptr, *cptr;
char *in_buf = 0;
char *bz_buf = 0;
-#if HAVE_BZLIB_H
- unsigned int bz_size;
-#endif
char compression_method;
assert(sysno > 0);
freeblock = entry.next / 8;
assert(freeblock > 0);
-
+
rec_tmp_expand(p, entry.size);
cptr = p->tmp_buf;
zint tmp;
cptr += p->head.block_size[dst_type] - sizeof(freeblock);
-
+
memcpy(&tmp, cptr, sizeof(tmp));
r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr);
if (r < 0)
in_size = entry.size - sizeof(short) - sizeof(char);
switch (compression_method)
{
+ case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+ if (1)
+ {
+ unsigned int bz_size = entry.size * 20 + 100;
+ while (1)
+ {
+ uLongf destLen = bz_size;
+ bz_buf = (char *) xmalloc(bz_size);
+ i = uncompress((Bytef *) bz_buf, &destLen,
+ (const Bytef *) in_buf, in_size);
+ if (i == Z_OK)
+ {
+ bz_size = destLen;
+ break;
+ }
+ yaz_log(YLOG_LOG, "failed");
+ xfree(bz_buf);
+ bz_size *= 2;
+ }
+ in_buf = bz_buf;
+ in_size = bz_size;
+ }
+#else
+ yaz_log(YLOG_FATAL, "cannot decompress record(s) in ZLIB format");
+ return 0;
+#endif
+ break;
case REC_COMPRESS_BZIP2:
#if HAVE_BZLIB_H
- bz_size = entry.size * 20 + 100;
- while (1)
- {
- bz_buf = (char *) xmalloc(bz_size);
+ if (1)
+ {
+ unsigned int bz_size = entry.size * 20 + 100;
+ while (1)
+ {
+ bz_buf = (char *) xmalloc(bz_size);
#ifdef BZ_CONFIG_ERROR
- i = BZ2_bzBuffToBuffDecompress
+ i = BZ2_bzBuffToBuffDecompress
#else
- i = bzBuffToBuffDecompress
+ i = bzBuffToBuffDecompress
#endif
- (bz_buf, &bz_size, in_buf, in_size, 0, 0);
- yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size);
- if (i == BZ_OK)
- break;
- yaz_log(YLOG_LOG, "failed");
- xfree(bz_buf);
- bz_size *= 2;
- }
- in_buf = bz_buf;
- in_size = bz_size;
+ (bz_buf, &bz_size, in_buf, in_size, 0, 0);
+ if (i == BZ_OK)
+ break;
+ yaz_log(YLOG_LOG, "failed");
+ xfree(bz_buf);
+ bz_size *= 2;
+ }
+ in_buf = bz_buf;
+ in_size = bz_size;
+ }
#else
yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format");
return 0;
return rec_get(p, rec_sysno_to_ext(1));
}
+Record rec_get_next(Records p, Record rec)
+{
+ Record next = 0;
+ zint next_sysno_int = rec_sysno_to_int(rec->sysno);
+
+ while (!next)
+ {
+ ++next_sysno_int;
+ if (next_sysno_int == p->head.index_last)
+ break;
+ next = rec_get(p, rec_sysno_to_ext(next_sysno_int));
+ }
+ return next;
+}
+
static Record rec_new_int(Records p)
{
int i;
return p;
}
+void rec_prstat(Records records, int verbose)
+{
+ int i;
+ zint total_bytes = 0;
+
+ yaz_log (YLOG_LOG,
+ "Total records %8" ZINT_FORMAT0,
+ records->head.no_records);
+
+ for (i = 0; i< REC_BLOCK_TYPES; i++)
+ {
+ yaz_log (YLOG_LOG, "Record blocks of size "ZINT_FORMAT,
+ records->head.block_size[i]);
+ yaz_log (YLOG_LOG,
+ " Used/Total/Bytes used "
+ ZINT_FORMAT "/" ZINT_FORMAT "/" ZINT_FORMAT,
+ records->head.block_used[i], records->head.block_last[i]-1,
+ records->head.block_used[i] * records->head.block_size[i]);
+ total_bytes +=
+ records->head.block_used[i] * records->head.block_size[i];
+
+ yaz_log(YLOG_LOG, " Block Last " ZINT_FORMAT, records->head.block_last[i]);
+ if (verbose)
+ { /* analyse free lists */
+ zint no_free = 0;
+ zint block_free = records->head.block_free[i];
+ WRBUF w = wrbuf_alloc();
+ while (block_free)
+ {
+ zint nblock;
+ no_free++;
+ wrbuf_printf(w, " " ZINT_FORMAT, block_free);
+ if (bf_read(records->data_BFile[i],
+ block_free, 0, sizeof(nblock), &nblock) != 1)
+ {
+ yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block "
+ ZINT_FORMAT,
+ records->data_fname[i], block_free);
+ break;
+ }
+ block_free = nblock;
+ }
+ yaz_log (YLOG_LOG,
+ " Number in free list %8" ZINT_FORMAT0, no_free);
+ if (no_free)
+ yaz_log(YLOG_LOG, "%s", wrbuf_cstr(w));
+ wrbuf_destroy(w);
+ }
+ }
+ yaz_log (YLOG_LOG,
+ "Total size of record index in bytes %8" ZINT_FORMAT0,
+ records->head.total_bytes);
+ yaz_log (YLOG_LOG,
+ "Total size with overhead %8" ZINT_FORMAT0,
+ total_bytes);
+}
+
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab