X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frecords.c;h=bb434967c6bc172329a8033d6c8742c56462c241;hp=e52368f54379e139a2f0984dafe652ac249bfdb2;hb=250de4ed23a44f5eb3552db317eef0d0fbe3265c;hpb=f3f94568869a1ae0402bfa4f0dea4d80b09695df diff --git a/index/records.c b/index/records.c index e52368f..bb43496 100644 --- a/index/records.c +++ b/index/records.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1995-2008 Index Data + Copyright (C) 2004-2013 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -18,12 +18,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* - * Format of first block + * Format of first block (assumes a 512 block size) * next (8 bytes) * ref_count (2 bytes) * block (500 bytes) * - * Format of subsequent blocks + * Format of subsequent blocks * next (8 bytes) * block (502 bytes) * @@ -32,6 +32,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * (length, data) - pairs * length = 0 if same as previous */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -44,6 +47,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #if HAVE_BZLIB_H #include #endif +#if HAVE_ZLIB_H +#include +#endif #define REC_BLOCK_TYPES 2 #define REC_HEAD_MAGIC "recindex" @@ -171,6 +177,7 @@ static ZEBRA_RES rec_release_blocks(Records p, zint sysno) memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); if (ref) { + /* there is still a reference to this block.. */ if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(block_and_ref), block_and_ref)) { @@ -179,9 +186,10 @@ static ZEBRA_RES rec_release_blocks(Records p, zint sysno) } return ZEBRA_OK; } - first = 0; + /* the list of blocks can all be removed (ref == 0) */ + first = 0; } - + if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), &p->head.block_free[dst_type])) { @@ -206,7 +214,6 @@ static ZEBRA_RES rec_delete_single(Records p, Record rec) if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK) return ZEBRA_FAIL; - yaz_log(YLOG_LOG, "rec_delete_single sysno=" ZINT_FORMAT, rec->sysno); entry.next = p->head.index_free; entry.size = 0; p->head.index_free = rec_sysno_to_int(rec->sysno); @@ -264,7 +271,7 @@ static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos) cptr = p->tmp_buf + no_written; } block_prev = block_free; - no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) + no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) - sizeof(zint); p->head.block_used[dst_type]++; } @@ -276,6 +283,28 @@ static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos) return ZEBRA_OK; } +int rec_check_compression_method(int compression_method) +{ + switch(compression_method) + { + case REC_COMPRESS_ZLIB: +#if HAVE_ZLIB_H + return 1; +#else + return 0; +#endif + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + return 1; +#else + return 0; +#endif + case REC_COMPRESS_NONE: + return 1; + } + return 0; +} + Records rec_open(BFiles bfs, int rw, int compression_method) { Records p; @@ -287,9 +316,11 @@ Records rec_open(BFiles bfs, int rw, int compression_method) memset(&p->head, '\0', sizeof(p->head)); p->compression_method = compression_method; p->rw = rw; - p->tmp_size = 1024; + p->tmp_size = 4096; p->tmp_buf = (char *) xmalloc(p->tmp_size); p->compression_chunk_size = 0; + if (compression_method == REC_COMPRESS_BZIP2) + p->compression_chunk_size = 90000; p->recindex = recindex_open(bfs, rw, 0 /* 1=isamb for recindex */); r = recindex_read_head(p->recindex, p->tmp_buf); switch (r) @@ -307,16 +338,16 @@ Records rec_open(BFiles bfs, int rw, int compression_method) p->head.block_last[i] = 1; p->head.block_used[i] = 0; } - p->head.block_size[0] = 128; + p->head.block_size[0] = 256; p->head.block_move[0] = 0; for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; - p->head.block_move[i] = p->head.block_size[i] * 24; + p->head.block_size[i] = p->head.block_size[i-1] * 8; + p->head.block_move[i] = p->head.block_size[i] * 2; } if (rw) { - if (recindex_write_head(p->recindex, + if (recindex_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK) ret = ZEBRA_FAIL; } @@ -337,7 +368,6 @@ Records rec_open(BFiles bfs, int rw, int compression_method) recindex_get_fname(p->recindex), version, REC_VERSION); ret = ZEBRA_FAIL; } - p->compression_chunk_size = 90000; /* good for BZIP2 */ break; } for (i = 0; isysno), + rec_encode_zint(rec_sysno_to_int(rec->sysno), (unsigned char *) *out_buf + *out_offset, &len); (*out_offset) += len; } @@ -479,11 +509,91 @@ static void rec_cache_flush_block1(Records p, Record rec, Record last_rec, } } +static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos, + char *out_buf, int out_offset) +{ + ZEBRA_RES ret = ZEBRA_OK; + if (ref_count) + { + int i; + unsigned int csize = 0; /* indicate compression "not performed yet" */ + char compression_method = p->compression_method; + switch (compression_method) + { + case REC_COMPRESS_ZLIB: +#if HAVE_ZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + while (1) + { + int r; + uLongf destLen = csize; + rec_tmp_expand(p, csize); + r = compress((Bytef *) p->tmp_buf+sizeof(zint)+sizeof(short)+ + sizeof(char), + &destLen, (const Bytef *) out_buf, out_offset); + csize = destLen; + if (r == Z_OK) + { + break; + } + if (r != Z_MEM_ERROR) + { + yaz_log(YLOG_WARN, "compress error: %d", r); + csize = 0; + break; + } + csize = csize * 2; + } +#endif + break; + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + rec_tmp_expand(p, csize); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffCompress +#else + i = bzBuffToBuffCompress +#endif + (p->tmp_buf+sizeof(zint)+sizeof(short)+ + sizeof(char), + &csize, out_buf, out_offset, 1, 0, 30); + if (i != BZ_OK) + { + yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); + csize = 0; + } +#endif + break; + case REC_COMPRESS_NONE: + break; + } + if (!csize) + { + /* either no compression or compression not supported ... */ + csize = out_offset; + rec_tmp_expand(p, csize); + memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), + out_buf, out_offset); + csize = out_offset; + compression_method = REC_COMPRESS_NONE; + } + memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); + memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), + &compression_method, sizeof(compression_method)); + + /* -------- compression */ + if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos) + != ZEBRA_OK) + ret = ZEBRA_FAIL; + } + return ret; +} + static ZEBRA_RES rec_write_multiple(Records p, int saveCount) { int i; short ref_count = 0; - char compression_method; Record last_rec = 0; int out_size = 1000; int out_offset = 0; @@ -523,61 +633,15 @@ static ZEBRA_RES rec_write_multiple(Records p, int saveCount) e->flag = recordFlagNop; break; - default: + case recordFlagNop: break; + default: + break; } } *sysnop = -1; - if (ref_count) - { - unsigned int csize = 0; /* indicate compression "not performed yet" */ - compression_method = p->compression_method; - switch (compression_method) - { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - csize = out_offset + (out_offset >> 6) + 620; - rec_tmp_expand(p, csize); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffCompress -#else - i = bzBuffToBuffCompress -#endif - (p->tmp_buf+sizeof(zint)+sizeof(short)+ - sizeof(char), - &csize, out_buf, out_offset, 1, 0, 30); - if (i != BZ_OK) - { - yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); - csize = 0; - } - yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, - csize); -#endif - break; - case REC_COMPRESS_NONE: - break; - } - if (!csize) - { - /* either no compression or compression not supported ... */ - csize = out_offset; - rec_tmp_expand(p, csize); - memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), - out_buf, out_offset); - csize = out_offset; - compression_method = REC_COMPRESS_NONE; - } - memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); - memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), - &compression_method, sizeof(compression_method)); - - /* -------- compression */ - if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos) - != ZEBRA_OK) - ret = ZEBRA_FAIL; - } + rec_flush_shared(p, ref_count, sysnos, out_buf, out_offset); xfree(out_buf); xfree(sysnos); return ret; @@ -597,7 +661,7 @@ static ZEBRA_RES rec_cache_flush(Records p, int saveCount) { struct record_cache_entry *e = p->record_cache + i; rec_free(&e->rec); - } + } /* i still being used ... */ for (j = 0; jrecord_cache+j, p->record_cache+i, @@ -696,9 +760,6 @@ static Record rec_get_int(Records p, zint sysno) char *nptr, *cptr; char *in_buf = 0; char *bz_buf = 0; -#if HAVE_BZLIB_H - unsigned int bz_size; -#endif char compression_method; assert(sysno > 0); @@ -718,7 +779,7 @@ static Record rec_get_int(Records p, zint sysno) freeblock = entry.next / 8; assert(freeblock > 0); - + rec_tmp_expand(p, entry.size); cptr = p->tmp_buf; @@ -732,7 +793,7 @@ static Record rec_get_int(Records p, zint sysno) zint tmp; cptr += p->head.block_size[dst_type] - sizeof(freeblock); - + memcpy(&tmp, cptr, sizeof(tmp)); r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); if (r < 0) @@ -749,27 +810,57 @@ static Record rec_get_int(Records p, zint sysno) in_size = entry.size - sizeof(short) - sizeof(char); switch (compression_method) { + case REC_COMPRESS_ZLIB: +#if HAVE_ZLIB_H + if (1) + { + unsigned int bz_size = entry.size * 20 + 100; + while (1) + { + uLongf destLen = bz_size; + bz_buf = (char *) xmalloc(bz_size); + i = uncompress((Bytef *) bz_buf, &destLen, + (const Bytef *) in_buf, in_size); + if (i == Z_OK) + { + bz_size = destLen; + break; + } + yaz_log(YLOG_LOG, "failed"); + xfree(bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; + } +#else + yaz_log(YLOG_FATAL, "cannot decompress record(s) in ZLIB format"); + return 0; +#endif + break; case REC_COMPRESS_BZIP2: #if HAVE_BZLIB_H - bz_size = entry.size * 20 + 100; - while (1) - { - bz_buf = (char *) xmalloc(bz_size); + if (1) + { + unsigned int bz_size = entry.size * 20 + 100; + while (1) + { + bz_buf = (char *) xmalloc(bz_size); #ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffDecompress + i = BZ2_bzBuffToBuffDecompress #else - i = bzBuffToBuffDecompress + i = bzBuffToBuffDecompress #endif - (bz_buf, &bz_size, in_buf, in_size, 0, 0); - yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); - if (i == BZ_OK) - break; - yaz_log(YLOG_LOG, "failed"); - xfree(bz_buf); - bz_size *= 2; - } - in_buf = bz_buf; - in_size = bz_size; + (bz_buf, &bz_size, in_buf, in_size, 0, 0); + if (i == BZ_OK) + break; + yaz_log(YLOG_LOG, "failed"); + xfree(bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; + } #else yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format"); return 0; @@ -846,6 +937,21 @@ Record rec_get_root(Records p) return rec_get(p, rec_sysno_to_ext(1)); } +Record rec_get_next(Records p, Record rec) +{ + Record next = 0; + zint next_sysno_int = rec_sysno_to_int(rec->sysno); + + while (!next) + { + ++next_sysno_int; + if (next_sysno_int == p->head.index_last) + break; + next = rec_get(p, rec_sysno_to_ext(next_sysno_int)); + } + return next; +} + static Record rec_new_int(Records p) { int i; @@ -983,11 +1089,11 @@ char *rec_strdup(const char *s, size_t *len) return p; } -void rec_prstat(Records records) +void rec_prstat(Records records, int verbose) { int i; zint total_bytes = 0; - + yaz_log (YLOG_LOG, "Total records %8" ZINT_FORMAT0, records->head.no_records); @@ -1003,6 +1109,34 @@ void rec_prstat(Records records) records->head.block_used[i] * records->head.block_size[i]); total_bytes += records->head.block_used[i] * records->head.block_size[i]; + + yaz_log(YLOG_LOG, " Block Last " ZINT_FORMAT, records->head.block_last[i]); + if (verbose) + { /* analyse free lists */ + zint no_free = 0; + zint block_free = records->head.block_free[i]; + WRBUF w = wrbuf_alloc(); + while (block_free) + { + zint nblock; + no_free++; + wrbuf_printf(w, " " ZINT_FORMAT, block_free); + if (bf_read(records->data_BFile[i], + block_free, 0, sizeof(nblock), &nblock) != 1) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block " + ZINT_FORMAT, + records->data_fname[i], block_free); + break; + } + block_free = nblock; + } + yaz_log (YLOG_LOG, + " Number in free list %8" ZINT_FORMAT0, no_free); + if (no_free) + yaz_log(YLOG_LOG, "%s", wrbuf_cstr(w)); + wrbuf_destroy(w); + } } yaz_log (YLOG_LOG, "Total size of record index in bytes %8" ZINT_FORMAT0, @@ -1015,6 +1149,7 @@ void rec_prstat(Records records) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab