X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frecindex.c;h=c3392ddf4b00ef98d2bcdb62b7b26cf780e116d2;hb=f00cbbbdc1df5478ce74c4a13efafc42257e4cc1;hp=2fe16b8cdeb9e872cf4248a03f1ad2ba211feabf;hpb=795af4e3c7346eff351ff387228ec548956eada8;p=idzebra-moved-to-github.git diff --git a/index/recindex.c b/index/recindex.c index 2fe16b8..c3392dd 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,117 +1,36 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: recindex.c,v $ - * Revision 1.24 1999-06-25 13:48:02 adam - * Updated MSVC project files. - * Added BZIP2 record compression (not very well tested). - * - * Revision 1.23 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.22 1999/02/18 12:49:34 adam - * Changed file naming scheme for register files as well as record - * store/index files. - * - * Revision 1.21 1999/02/02 14:51:03 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.20 1998/01/12 15:04:08 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.19 1997/09/17 12:19:16 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.18 1997/07/15 16:28:42 adam - * Bug fix: storeData didn't work with files with multiple records. - * Bug fix: fixed memory management with records; not really well - * thought through. - * - * Revision 1.17 1997/02/12 20:39:46 adam - * Implemented options -f that limits the log to the first - * records. - * Changed some log messages also. - * - * Revision 1.16 1996/06/04 10:19:00 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.15 1996/05/13 14:23:06 adam - * Work on compaction of set/use bytes in dictionary. - * - * Revision 1.14 1996/02/01 20:48:15 adam - * The total size of records are always checked in rec_cache_insert to - * reduce memory usage. - * - * Revision 1.13 1995/12/11 09:12:49 adam - * The rec_get function returns NULL if record doesn't exist - will - * happen in the server if the result set records have been deleted since - * the creation of the set (i.e. the search). - * The server saves a result temporarily if it is 'volatile', i.e. the - * set is register dependent. - * - * Revision 1.12 1995/12/07 17:38:47 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.11 1995/12/06 13:58:26 adam - * Improved flushing of records - all flushes except the last one - * don't write the last accessed. Also flush takes place if record - * info occupy more than about 256k. - * - * Revision 1.10 1995/12/06 12:41:24 adam - * New command 'stat' for the index program. - * Filenames can be read from stdin by specifying '-'. - * Bug fix/enhancement of the transformation from terms to regular - * expressons in the search engine. - * - * Revision 1.9 1995/11/30 08:34:33 adam - * Started work on commit facility. - * Changed a few malloc/free to xmalloc/xfree. - * - * Revision 1.8 1995/11/28 14:26:21 adam - * Bug fix: recordId with constant wasn't right. - * Bug fix: recordId dictionary entry wasn't deleted when needed. - * - * Revision 1.7 1995/11/28 09:09:43 adam - * Zebra config renamed. - * Use setting 'recordId' to identify record now. - * Bug fix in recindex.c: rec_release_blocks was invokeded even - * though the blocks were already released. - * File traversal properly deletes records when needed. - * - * Revision 1.6 1995/11/25 10:24:06 adam - * More record fields - they are enumerated now. - * New options: flagStoreData flagStoreKey. - * - * Revision 1.5 1995/11/22 17:19:18 adam - * Record management uses the bfile system. - * - * Revision 1.4 1995/11/20 16:59:46 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.3 1995/11/16 15:34:55 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.2 1995/11/15 19:13:08 adam - * Work on record management. - * - * Revision 1.1 1995/11/15 14:46:20 adam - * Started work on better record management system. - * - */ +/* $Id: recindex.c,v 1.44 2005-05-11 12:36:45 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#define RIDX_CHUNK 128 /* * Format of first block - * next (4 bytes) - * ref_count (4 bytes) - * block (504 bytes) + * next (8 bytes) + * ref_count (2 bytes) + * block (500 bytes) * * Format of subsequent blocks - * next (4 bytes) - * block (508 bytes) + * next (8 bytes) + * block (502 bytes) * * Format of each record * sysno @@ -119,97 +38,119 @@ * length = 0 if same as previous */ #include +#include #include #include +#include #include "recindxp.h" #if HAVE_BZLIB_H #include #endif -static void rec_write_head (Records p) +static void rec_write_head(Records p) { int r; - assert (p); - assert (p->index_BFile); + assert(p); + assert(p->index_BFile); - r = bf_write (p->index_BFile, 0, 0, sizeof(p->head), &p->head); + r = bf_write(p->index_BFile, 0, 0, sizeof(p->head), &p->head); if (r) { - logf (LOG_FATAL|LOG_ERRNO, "write head of %s", p->index_fname); - exit (1); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write head of %s", p->index_fname); + exit(1); } } -static void rec_tmp_expand (Records p, int size) +static void rec_tmp_expand(Records p, int size) { if (p->tmp_size < size + 2048 || p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2) { - xfree (p->tmp_buf); - p->tmp_size = size + p->head.block_size[REC_BLOCK_TYPES-1]*2 + 2048; - p->tmp_buf = (char *) xmalloc (p->tmp_size); + xfree(p->tmp_buf); + p->tmp_size = size + (int) + (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048; + p->tmp_buf = (char *) xmalloc(p->tmp_size); } } -static int read_indx (Records p, int sysno, void *buf, int itemsize, +static int read_indx(Records p, SYSNO sysno, void *buf, int itemsize, int ignoreError) { int r; - int pos = (sysno-1)*itemsize; + zint pos = (sysno-1)*itemsize; + int off = (int) (pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ - r = bf_read (p->index_BFile, 1+pos/128, pos%128, itemsize, buf); + r = bf_read(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (r == 1 && sz1 < itemsize) /* boundary? - must read second part */ + r = bf_read(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); if (r != 1 && !ignoreError) { - logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at pos %ld", p->index_fname, (long) pos); - exit (1); + exit(1); } return r; } -static void write_indx (Records p, int sysno, void *buf, int itemsize) +static void write_indx(Records p, SYSNO sysno, void *buf, int itemsize) { - int pos = (sysno-1)*itemsize; + zint pos = (sysno-1)*itemsize; + int off = (int) (pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ - bf_write (p->index_BFile, 1+pos/128, pos%128, itemsize, buf); + bf_write(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (sz1 < itemsize) /* boundary? must write second part */ + bf_write(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); } -static void rec_release_blocks (Records p, int sysno) +static void rec_release_blocks(Records p, SYSNO sysno) { struct record_index_entry entry; - int freeblock; - int block_and_ref[2]; + zint freeblock; + char block_and_ref[sizeof(zint) + sizeof(short)]; int dst_type; int first = 1; - logf (LOG_LOG, "release_blocks for sysno=%d", sysno); - if (read_indx (p, sysno, &entry, sizeof(entry), 1) != 1) + if (read_indx(p, sysno, &entry, sizeof(entry), 1) != 1) return ; freeblock = entry.next; - assert (freeblock > 0); - dst_type = freeblock & 7; - assert (dst_type < REC_BLOCK_TYPES); + assert(freeblock > 0); + dst_type = (int) (freeblock & 7); + assert(dst_type < REC_BLOCK_TYPES); freeblock = freeblock / 8; while (freeblock) { - if (bf_read (p->data_BFile[dst_type], freeblock, 0, - sizeof(block_and_ref), block_and_ref) != 1) + if (bf_read(p->data_BFile[dst_type], freeblock, 0, + first ? sizeof(block_and_ref) : sizeof(zint), + block_and_ref) != 1) { - logf (LOG_FATAL|LOG_ERRNO, "read in rec_del_single"); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single"); exit (1); } if (first) { - block_and_ref[1]--; - if (block_and_ref[1]) + short ref; + memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref)); + --ref; + memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); + if (ref) { - if (bf_write (p->data_BFile[dst_type], freeblock, 0, + if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(block_and_ref), block_and_ref)) { - logf (LOG_FATAL|LOG_ERRNO, "write in rec_del_single"); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); exit (1); } return; @@ -217,72 +158,38 @@ static void rec_release_blocks (Records p, int sysno) first = 0; } - if (bf_write (p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), + if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), &p->head.block_free[dst_type])) { - logf (LOG_FATAL|LOG_ERRNO, "write in rec_del_single"); - exit (1); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single"); + exit(1); } p->head.block_free[dst_type] = freeblock; - freeblock = block_and_ref[0]; + memcpy(&freeblock, block_and_ref, sizeof(freeblock)); p->head.block_used[dst_type]--; } p->head.total_bytes -= entry.size; } -static void rec_delete_single (Records p, Record rec) +static void rec_delete_single(Records p, Record rec) { struct record_index_entry entry; - rec_release_blocks (p, rec->sysno); + rec_release_blocks(p, rec->sysno); entry.next = p->head.index_free; entry.size = 0; p->head.index_free = rec->sysno; - write_indx (p, rec->sysno, &entry, sizeof(entry)); + write_indx(p, rec->sysno, &entry, sizeof(entry)); } -static void rec_write_tmp_buf (Records p, int size, int *sysnos); - -static void rec_write_single (Records p, Record rec) -{ - - int sysnos[2]; - int i, size = 0; - char *cptr; - - logf (LOG_LOG, " rec_write_single !!!!!!!!!!!!!!!!!!!!!!!!!!!!"); - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - size += sizeof(*rec->size); - else - size += sizeof(*rec->size) + rec->size[i]; - - rec_tmp_expand (p, size); - - cptr = p->tmp_buf + sizeof(int); /* a hack! */ - for (i = 0; i < REC_NO_INFO; i++) - { - memcpy (cptr, &rec->size[i], sizeof(*rec->size)); - cptr += sizeof(*rec->size); - if (rec->info[i]) - { - memcpy (cptr, rec->info[i], rec->size[i]); - cptr += rec->size[i]; - } - } - sysnos[0] = rec->sysno; - sysnos[1] = -1; - rec_write_tmp_buf (p, size, sysnos); -} - -static void rec_write_tmp_buf (Records p, int size, int *sysnos) +static void rec_write_tmp_buf(Records p, int size, SYSNO *sysnos) { struct record_index_entry entry; int no_written = 0; char *cptr = p->tmp_buf; - int block_prev = -1, block_free; + zint block_prev = -1, block_free; int dst_type = 0; int i; @@ -294,12 +201,13 @@ static void rec_write_tmp_buf (Records p, int size, int *sysnos) block_free = p->head.block_free[dst_type]; if (block_free) { - if (bf_read (p->data_BFile[dst_type], + if (bf_read(p->data_BFile[dst_type], block_free, 0, sizeof(*p->head.block_free), &p->head.block_free[dst_type]) != 1) { - logf (LOG_FATAL|LOG_ERRNO, "read in %s at free block %d", - p->data_fname[dst_type], block_free); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block " + ZINT_FORMAT, + p->data_fname[dst_type], block_free); exit (1); } } @@ -318,48 +226,45 @@ static void rec_write_tmp_buf (Records p, int size, int *sysnos) } else { - memcpy (cptr, &block_free, sizeof(int)); - bf_write (p->data_BFile[dst_type], block_prev, 0, 0, cptr); + memcpy(cptr, &block_free, sizeof(block_free)); + bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr); cptr = p->tmp_buf + no_written; } block_prev = block_free; - no_written += p->head.block_size[dst_type] - sizeof(int); + no_written += (int)(p->head.block_size[dst_type]) - sizeof(zint); p->head.block_used[dst_type]++; } - assert (block_prev != -1); + assert(block_prev != -1); block_free = 0; - memcpy (cptr, &block_free, sizeof(int)); - bf_write (p->data_BFile[dst_type], block_prev, 0, - sizeof(int) + (p->tmp_buf+size) - cptr, cptr); + memcpy(cptr, &block_free, sizeof(block_free)); + bf_write(p->data_BFile[dst_type], block_prev, 0, + sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr); } -static void rec_update_single (Records p, Record rec) -{ - rec_release_blocks (p, rec->sysno); - rec_write_single (p, rec); -} - -Records rec_open (BFiles bfs, int rw) +Records rec_open(BFiles bfs, int rw, int compression_method) { Records p; int i, r; + int version; p = (Records) xmalloc (sizeof(*p)); + p->compression_method = compression_method; p->rw = rw; p->tmp_size = 1024; p->tmp_buf = (char *) xmalloc (p->tmp_size); p->index_fname = "reci"; - p->index_BFile = bf_open (bfs, p->index_fname, 128, rw); + p->index_BFile = bf_open (bfs, p->index_fname, RIDX_CHUNK, rw); if (p->index_BFile == NULL) { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->index_fname); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); exit (1); } - r = bf_read (p->index_BFile, 0, 0, 0, p->tmp_buf); + r = bf_read(p->index_BFile, 0, 0, 0, p->tmp_buf); switch (r) { case 0: - memcpy (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); + memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); + sprintf (p->head.version, "%3d", REC_VERSION); p->head.index_free = 0; p->head.index_last = 1; p->head.no_records = 0; @@ -375,18 +280,25 @@ Records rec_open (BFiles bfs, int rw) for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; - p->head.block_move[i] = p->head.block_size[i] * 3; + p->head.block_move[i] = p->head.block_size[i] * 24; } if (rw) - rec_write_head (p); + rec_write_head(p); break; case 1: - memcpy (&p->head, p->tmp_buf, sizeof(p->head)); + memcpy(&p->head, p->tmp_buf, sizeof(p->head)); if (memcmp (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) { - logf (LOG_FATAL, "read %s. bad header", p->index_fname); + yaz_log(YLOG_FATAL, "file %s has bad format", p->index_fname); exit (1); } + version = atoi (p->head.version); + if (version != REC_VERSION) + { + yaz_log(YLOG_FATAL, "file %s is version %d, but version" + " %d is required", p->index_fname, version, REC_VERSION); + exit (1); + } break; } for (i = 0; idata_fname[i] = (char *) xmalloc (strlen(str)+1); - strcpy (p->data_fname[i], str); + strcpy(p->data_fname[i], str); p->data_BFile[i] = NULL; } for (i = 0; idata_BFile[i] = bf_open (bfs, p->data_fname[i], - p->head.block_size[i], + (int) (p->head.block_size[i]), rw))) { - logf (LOG_FATAL|LOG_ERRNO, "bf_open %s", p->data_fname[i]); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]); exit (1); } } @@ -411,10 +323,11 @@ Records rec_open (BFiles bfs, int rw) p->cache_cur = 0; p->record_cache = (struct record_cache_entry *) xmalloc (sizeof(*p->record_cache)*p->cache_max); + zebra_mutex_init (&p->mutex); return p; } -static void rec_encode_unsigned (unsigned n, unsigned char *buf, int *len) +static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len) { (*len) = 0; while (n > 127) @@ -443,9 +356,40 @@ static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) (*len)++; *np = n; } -static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, - char **out_buf, int *out_size, - int *out_offset) + +static void rec_encode_zint(zint n, unsigned char *buf, int *len) +{ + (*len) = 0; + while (n > 127) + { + buf[*len] = (unsigned) (128 + (n & 127)); + n = n >> 7; + (*len)++; + } + buf[*len] = (unsigned) n; + (*len)++; +} + +static void rec_decode_zint(zint *np, unsigned char *buf, int *len) +{ + zint n = 0; + zint w = 1; + (*len) = 0; + + while (buf[*len] > 127) + { + n += w*(buf[*len] & 127); + w = w << 7; + (*len)++; + } + n += w * buf[*len]; + (*len)++; + *np = n; +} + +static void rec_cache_flush_block1(Records p, Record rec, Record last_rec, + char **out_buf, int *out_size, + int *out_offset) { int i; int len; @@ -457,14 +401,14 @@ static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, int new_size = *out_offset + rec->size[i] + 65536; char *np = (char *) xmalloc (new_size); if (*out_offset) - memcpy (np, *out_buf, *out_offset); + memcpy(np, *out_buf, *out_offset); xfree (*out_buf); *out_size = new_size; *out_buf = np; } if (i == 0) { - rec_encode_unsigned (rec->sysno, *out_buf + *out_offset, &len); + rec_encode_zint (rec->sysno, *out_buf + *out_offset, &len); (*out_offset) += len; } if (rec->size[i] == 0) @@ -482,22 +426,23 @@ static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, { rec_encode_unsigned (rec->size[i]+1, *out_buf + *out_offset, &len); (*out_offset) += len; - memcpy (*out_buf + *out_offset, rec->info[i], rec->size[i]); + memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]); (*out_offset) += rec->size[i]; } } } -static void rec_write_multiple (Records p, int saveCount) +static void rec_write_multiple(Records p, int saveCount) { int i; - int ref_count = 0; + short ref_count = 0; + char compression_method; Record last_rec = 0; int out_size = 1000; int out_offset = 0; char *out_buf = (char *) xmalloc (out_size); - int *sysnos = (int *) xmalloc (sizeof(*sysnos) * (p->cache_cur + 1)); - int *sysnop = sysnos; + SYSNO *sysnos = (SYSNO *) xmalloc (sizeof(*sysnos) * (p->cache_cur + 1)); + SYSNO *sysnop = sysnos; for (i = 0; icache_cur - saveCount; i++) { @@ -521,37 +466,68 @@ static void rec_write_multiple (Records p, int saveCount) e->flag = recordFlagNop; last_rec = e->rec; break; + case recordFlagDelete: + rec_delete_single (p, e->rec); + e->flag = recordFlagNop; + break; default: break; } } + *sysnop = -1; if (ref_count) { - int csize = out_offset + (out_offset >> 6) + 620; - - rec_tmp_expand (p, csize); -#if HAVE_BZLIB_H - i = bzBuffToBuffCompress (p->tmp_buf+2*sizeof(int), &csize, - out_buf, out_offset, 9, 0, 30); - if (i != BZ_OK) + int csize = 0; /* indicate compression "not performed yet" */ + compression_method = p->compression_method; + switch (compression_method) { - logf (LOG_FATAL, "bzBuffToCompress error code=%d", i); - exit (1); - } + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + rec_tmp_expand (p, csize); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffCompress #else - memcpy (p->tmp_buf + 2*sizeof(int), out_buf, out_offset); - csize = out_offset; + i = bzBuffToBuffCompress +#endif + (p->tmp_buf+sizeof(zint)+sizeof(short)+ + sizeof(char), + &csize, out_buf, out_offset, 1, 0, 30); + if (i != BZ_OK) + { + yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); + csize = 0; + } + yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, + csize); #endif - memcpy (p->tmp_buf + sizeof(int), &ref_count, sizeof(ref_count)); + break; + case REC_COMPRESS_NONE: + break; + } + if (!csize) + { + /* either no compression or compression not supported ... */ + csize = out_offset; + rec_tmp_expand (p, csize); + memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), + out_buf, out_offset); + csize = out_offset; + compression_method = REC_COMPRESS_NONE; + } + memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); + memcpy(p->tmp_buf + sizeof(zint)+sizeof(short), + &compression_method, sizeof(compression_method)); + /* -------- compression */ - rec_write_tmp_buf (p, csize + sizeof(int), sysnos); + rec_write_tmp_buf (p, csize + sizeof(short) + sizeof(char), sysnos); } xfree (out_buf); xfree (sysnos); } -static void rec_cache_flush (Records p, int saveCount) +static void rec_cache_flush(Records p, int saveCount) { int i, j; @@ -559,33 +535,21 @@ static void rec_cache_flush (Records p, int saveCount) saveCount = 0; rec_write_multiple (p, saveCount); + for (i = 0; icache_cur - saveCount; i++) { struct record_cache_entry *e = p->record_cache + i; - switch (e->flag) - { - case recordFlagNop: - break; - case recordFlagNew: - rec_write_single (p, e->rec); - break; - case recordFlagWrite: - rec_update_single (p, e->rec); - break; - case recordFlagDelete: - rec_delete_single (p, e->rec); - break; - } - rec_rm (&e->rec); - } + rec_rm(&e->rec); + } + /* i still being used ... */ for (j = 0; jrecord_cache+j, p->record_cache+i, + memcpy(p->record_cache+j, p->record_cache+i, sizeof(*p->record_cache)); p->cache_cur = saveCount; } -static Record *rec_cache_lookup (Records p, int sysno, - enum recordCacheFlag flag) +static Record *rec_cache_lookup(Records p, SYSNO sysno, + enum recordCacheFlag flag) { int i; for (i = 0; icache_cur; i++) @@ -601,7 +565,7 @@ static Record *rec_cache_lookup (Records p, int sysno, return NULL; } -static void rec_cache_insert (Records p, Record rec, enum recordCacheFlag flag) +static void rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag) { struct record_cache_entry *e; @@ -617,28 +581,29 @@ static void rec_cache_insert (Records p, Record rec, enum recordCacheFlag flag) for (j = 0; jsize[j]; } - if (used > 256000) + if (used > 90000) rec_cache_flush (p, 1); } - assert (p->cache_cur < p->cache_max); + assert(p->cache_cur < p->cache_max); e = p->record_cache + (p->cache_cur)++; e->flag = flag; e->rec = rec_cp (rec); } -void rec_close (Records *pp) +void rec_close(Records *pp) { Records p = *pp; int i; - assert (p); + assert(p); + zebra_mutex_destroy(&p->mutex); rec_cache_flush (p, 0); xfree (p->record_cache); if (p->rw) - rec_write_head (p); + rec_write_head(p); if (p->index_BFile) bf_close (p->index_BFile); @@ -654,74 +619,107 @@ void rec_close (Records *pp) *pp = NULL; } - -Record rec_get (Records p, int sysno) +static Record rec_get_int(Records p, SYSNO sysno) { - int i, in_size; + int i, in_size, r; Record rec, *recp; struct record_index_entry entry; - int freeblock, dst_type; + zint freeblock; + int dst_type; char *nptr, *cptr; char *in_buf = 0; + char *bz_buf = 0; +#if HAVE_BZLIB_H + int bz_size; +#endif + char compression_method; - assert (sysno > 0); - assert (p); + assert(sysno > 0); + assert(p); if ((recp = rec_cache_lookup (p, sysno, recordFlagNop))) return rec_cp (*recp); - if (!read_indx (p, sysno, &entry, sizeof(entry), 1)) + if (read_indx (p, sysno, &entry, sizeof(entry), 1) < 1) return NULL; /* record is not there! */ if (!entry.size) return NULL; /* record is deleted */ - dst_type = entry.next & 7; - assert (dst_type < REC_BLOCK_TYPES); + dst_type = (int) (entry.next & 7); + assert(dst_type < REC_BLOCK_TYPES); freeblock = entry.next / 8; - assert (freeblock > 0); + assert(freeblock > 0); - rec = (Record) xmalloc (sizeof(*rec)); rec_tmp_expand (p, entry.size); cptr = p->tmp_buf; - bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); - memcpy (&freeblock, cptr, sizeof(freeblock)); + r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; + memcpy(&freeblock, cptr, sizeof(freeblock)); while (freeblock) { - int tmp; + zint tmp; cptr += p->head.block_size[dst_type] - sizeof(freeblock); - memcpy (&tmp, cptr, sizeof(tmp)); - bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); - memcpy (&freeblock, cptr, sizeof(freeblock)); - memcpy (cptr, &tmp, sizeof(tmp)); + memcpy(&tmp, cptr, sizeof(tmp)); + r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; + memcpy(&freeblock, cptr, sizeof(freeblock)); + memcpy(cptr, &tmp, sizeof(tmp)); } + rec = (Record) xmalloc (sizeof(*rec)); rec->sysno = sysno; -#if HAVE_BZLIB_H - in_size = entry.size * 30+100; - in_buf = (char *) xmalloc (in_size); - i = bzBuffToBuffDecompress (in_buf, &in_size, p->tmp_buf+2*sizeof(int), - entry.size-sizeof(int), 0, 4); - if (i != BZ_OK) + memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short), + sizeof(compression_method)); + in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char); + in_size = entry.size - sizeof(short) - sizeof(char); + switch (compression_method) { - logf (LOG_FATAL, "bzBuffToDecompress error code=%d", i); - exit (1); - } + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + bz_size = entry.size * 20 + 100; + while (1) + { + bz_buf = (char *) xmalloc (bz_size); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffDecompress #else - in_buf = p->tmp_buf + 2*sizeof(int); - in_size = entry.size - sizeof(int); + i = bzBuffToBuffDecompress #endif + (bz_buf, &bz_size, in_buf, in_size, 0, 0); + yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); + if (i == BZ_OK) + break; + yaz_log(YLOG_LOG, "failed"); + xfree (bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; +#else + yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format"); + exit (1); +#endif + break; + case REC_COMPRESS_NONE: + break; + } + for (i = 0; iinfo[i] = 0; + nptr = in_buf; /* skip ref count */ while (nptr < in_buf + in_size) { - int this_sysno; + zint this_sysno; int len; - rec_decode_unsigned (&this_sysno, nptr, &len); + rec_decode_zint (&this_sysno, nptr, &len); nptr += len; for (i = 0; i < REC_NO_INFO; i++) @@ -736,8 +734,7 @@ Record rec_get (Records p, int sysno) if (rec->size[i]) { - rec->info[i] = (char *) xmalloc (rec->size[i]); - memcpy (rec->info[i], nptr, rec->size[i]); + rec->info[i] = nptr; nptr += rec->size[i]; } else @@ -746,17 +743,43 @@ Record rec_get (Records p, int sysno) if (this_sysno == sysno) break; } - xfree (in_buf); - rec_cache_insert (p, rec, recordFlagNop); + for (i = 0; iinfo[i] && rec->size[i]) + { + char *np = xmalloc (rec->size[i]+1); + memcpy(np, rec->info[i], rec->size[i]); + np[rec->size[i]] = '\0'; + rec->info[i] = np; + } + else + { + assert(rec->info[i] == 0); + assert(rec->size[i] == 0); + } + } + xfree (bz_buf); + rec_cache_insert(p, rec, recordFlagNop); return rec; } -Record rec_new (Records p) +Record rec_get(Records p, SYSNO sysno) { - int sysno, i; Record rec; + zebra_mutex_lock (&p->mutex); - assert (p); + rec = rec_get_int (p, sysno); + zebra_mutex_unlock (&p->mutex); + return rec; +} + +static Record rec_new_int(Records p) +{ + int i; + SYSNO sysno; + Record rec; + + assert(p); rec = (Record) xmalloc (sizeof(*rec)); if (1 || p->head.index_free == 0) sysno = (p->head.index_last)++; @@ -775,46 +798,60 @@ Record rec_new (Records p) rec->info[i] = NULL; rec->size[i] = 0; } - rec_cache_insert (p, rec, recordFlagNew); + rec_cache_insert(p, rec, recordFlagNew); + return rec; +} + +Record rec_new(Records p) +{ + Record rec; + zebra_mutex_lock (&p->mutex); + + rec = rec_new_int (p); + zebra_mutex_unlock (&p->mutex); return rec; } -void rec_del (Records p, Record *recpp) +void rec_del(Records p, Record *recpp) { Record *recp; + zebra_mutex_lock (&p->mutex); (p->head.no_records)--; if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagDelete))) { - rec_rm (recp); + rec_rm(recp); *recp = *recpp; } else { - rec_cache_insert (p, *recpp, recordFlagDelete); - rec_rm (recpp); + rec_cache_insert(p, *recpp, recordFlagDelete); + rec_rm(recpp); } + zebra_mutex_unlock (&p->mutex); *recpp = NULL; } -void rec_put (Records p, Record *recpp) +void rec_put(Records p, Record *recpp) { Record *recp; + zebra_mutex_lock (&p->mutex); if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagWrite))) { - rec_rm (recp); + rec_rm(recp); *recp = *recpp; } else { - rec_cache_insert (p, *recpp, recordFlagWrite); - rec_rm (recpp); + rec_cache_insert(p, *recpp, recordFlagWrite); + rec_rm(recpp); } + zebra_mutex_unlock (&p->mutex); *recpp = NULL; } -void rec_rm (Record *recpp) +void rec_rm(Record *recpp) { int i; @@ -826,7 +863,7 @@ void rec_rm (Record *recpp) *recpp = NULL; } -Record rec_cp (Record rec) +Record rec_cp(Record rec) { Record n; int i; @@ -843,13 +880,13 @@ Record rec_cp (Record rec) { n->size[i] = rec->size[i]; n->info[i] = (char *) xmalloc (rec->size[i]); - memcpy (n->info[i], rec->info[i], rec->size[i]); + memcpy(n->info[i], rec->info[i], rec->size[i]); } return n; } -char *rec_strdup (const char *s, size_t *len) +char *rec_strdup(const char *s, size_t *len) { char *p; @@ -860,7 +897,7 @@ char *rec_strdup (const char *s, size_t *len) } *len = strlen(s)+1; p = (char *) xmalloc (*len); - strcpy (p, s); + strcpy(p, s); return p; }