X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frecindex.c;h=bc9a1b5596b06408004b56e5379e8e3fcbc42359;hb=47ea1fc957c7b97bb30a26698f072109cae275e4;hp=0244da9c526c4796cfcc00ec339a5b9d1ab23100;hpb=f3425fb457792aae865096cf9acf5cb41798b1d1;p=idzebra-moved-to-github.git diff --git a/index/recindex.c b/index/recindex.c index 0244da9..bc9a1b5 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,109 +1,8 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: recindex.c,v $ - * Revision 1.25 1999-07-06 12:28:04 adam - * Updated record index structure. Format includes version ID. Compression - * algorithm ID is stored for each record block. - * - * Revision 1.24 1999/06/25 13:48:02 adam - * Updated MSVC project files. - * Added BZIP2 record compression (not very well tested). - * - * Revision 1.23 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.22 1999/02/18 12:49:34 adam - * Changed file naming scheme for register files as well as record - * store/index files. - * - * Revision 1.21 1999/02/02 14:51:03 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.20 1998/01/12 15:04:08 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.19 1997/09/17 12:19:16 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.18 1997/07/15 16:28:42 adam - * Bug fix: storeData didn't work with files with multiple records. - * Bug fix: fixed memory management with records; not really well - * thought through. - * - * Revision 1.17 1997/02/12 20:39:46 adam - * Implemented options -f that limits the log to the first - * records. - * Changed some log messages also. - * - * Revision 1.16 1996/06/04 10:19:00 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.15 1996/05/13 14:23:06 adam - * Work on compaction of set/use bytes in dictionary. - * - * Revision 1.14 1996/02/01 20:48:15 adam - * The total size of records are always checked in rec_cache_insert to - * reduce memory usage. - * - * Revision 1.13 1995/12/11 09:12:49 adam - * The rec_get function returns NULL if record doesn't exist - will - * happen in the server if the result set records have been deleted since - * the creation of the set (i.e. the search). - * The server saves a result temporarily if it is 'volatile', i.e. the - * set is register dependent. - * - * Revision 1.12 1995/12/07 17:38:47 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.11 1995/12/06 13:58:26 adam - * Improved flushing of records - all flushes except the last one - * don't write the last accessed. Also flush takes place if record - * info occupy more than about 256k. - * - * Revision 1.10 1995/12/06 12:41:24 adam - * New command 'stat' for the index program. - * Filenames can be read from stdin by specifying '-'. - * Bug fix/enhancement of the transformation from terms to regular - * expressons in the search engine. - * - * Revision 1.9 1995/11/30 08:34:33 adam - * Started work on commit facility. - * Changed a few malloc/free to xmalloc/xfree. - * - * Revision 1.8 1995/11/28 14:26:21 adam - * Bug fix: recordId with constant wasn't right. - * Bug fix: recordId dictionary entry wasn't deleted when needed. - * - * Revision 1.7 1995/11/28 09:09:43 adam - * Zebra config renamed. - * Use setting 'recordId' to identify record now. - * Bug fix in recindex.c: rec_release_blocks was invokeded even - * though the blocks were already released. - * File traversal properly deletes records when needed. - * - * Revision 1.6 1995/11/25 10:24:06 adam - * More record fields - they are enumerated now. - * New options: flagStoreData flagStoreKey. - * - * Revision 1.5 1995/11/22 17:19:18 adam - * Record management uses the bfile system. - * - * Revision 1.4 1995/11/20 16:59:46 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.3 1995/11/16 15:34:55 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.2 1995/11/15 19:13:08 adam - * Work on record management. - * - * Revision 1.1 1995/11/15 14:46:20 adam - * Started work on better record management system. * + * $Id: recindex.c,v 1.33 2002-07-15 11:50:01 adam Exp $ */ @@ -387,6 +286,7 @@ Records rec_open (BFiles bfs, int rw, int compression_method) p->cache_cur = 0; p->record_cache = (struct record_cache_entry *) xmalloc (sizeof(*p->record_cache)*p->cache_max); + zebra_mutex_init (&p->mutex); return p; } @@ -419,6 +319,7 @@ static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) (*len)++; *np = n; } + static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, char **out_buf, int *out_size, int *out_offset) @@ -505,8 +406,8 @@ static void rec_write_multiple (Records p, int saveCount) default: break; } - rec_rm (&e->rec); } + *sysnop = -1; if (ref_count) { @@ -518,7 +419,12 @@ static void rec_write_multiple (Records p, int saveCount) #if HAVE_BZLIB_H csize = out_offset + (out_offset >> 6) + 620; rec_tmp_expand (p, csize); - i = bzBuffToBuffCompress (p->tmp_buf+sizeof(int)+sizeof(short)+ +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffCompress +#else + i = bzBuffToBuffCompress +#endif + (p->tmp_buf+sizeof(int)+sizeof(short)+ sizeof(char), &csize, out_buf, out_offset, 1, 0, 30); if (i != BZ_OK) @@ -562,6 +468,13 @@ static void rec_cache_flush (Records p, int saveCount) saveCount = 0; rec_write_multiple (p, saveCount); + + for (i = 0; icache_cur - saveCount; i++) + { + struct record_cache_entry *e = p->record_cache + i; + rec_rm (&e->rec); + } + /* i still being used ... */ for (j = 0; jrecord_cache+j, p->record_cache+i, sizeof(*p->record_cache)); @@ -618,6 +531,7 @@ void rec_close (Records *pp) assert (p); + zebra_mutex_destroy (&p->mutex); rec_cache_flush (p, 0); xfree (p->record_cache); @@ -638,17 +552,18 @@ void rec_close (Records *pp) *pp = NULL; } - -Record rec_get (Records p, int sysno) +static Record rec_get_int (Records p, int sysno) { - int i, in_size; + int i, in_size, r; Record rec, *recp; struct record_index_entry entry; int freeblock, dst_type; char *nptr, *cptr; char *in_buf = 0; char *bz_buf = 0; +#if HAVE_BZLIB_H int bz_size; +#endif char compression_method; assert (sysno > 0); @@ -657,7 +572,7 @@ Record rec_get (Records p, int sysno) if ((recp = rec_cache_lookup (p, sysno, recordFlagNop))) return rec_cp (*recp); - if (!read_indx (p, sysno, &entry, sizeof(entry), 1)) + if (read_indx (p, sysno, &entry, sizeof(entry), 1) < 1) return NULL; /* record is not there! */ if (!entry.size) @@ -669,11 +584,12 @@ Record rec_get (Records p, int sysno) assert (freeblock > 0); - rec = (Record) xmalloc (sizeof(*rec)); rec_tmp_expand (p, entry.size); cptr = p->tmp_buf; - bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; memcpy (&freeblock, cptr, sizeof(freeblock)); while (freeblock) @@ -683,11 +599,14 @@ Record rec_get (Records p, int sysno) cptr += p->head.block_size[dst_type] - sizeof(freeblock); memcpy (&tmp, cptr, sizeof(tmp)); - bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; memcpy (&freeblock, cptr, sizeof(freeblock)); memcpy (cptr, &tmp, sizeof(tmp)); } + rec = (Record) xmalloc (sizeof(*rec)); rec->sysno = sysno; memcpy (&compression_method, p->tmp_buf + sizeof(int) + sizeof(short), sizeof(compression_method)); @@ -697,14 +616,22 @@ Record rec_get (Records p, int sysno) { case REC_COMPRESS_BZIP2: #if HAVE_BZLIB_H - bz_size = entry.size * 30+100; - bz_buf = (char *) xmalloc (bz_size); - i = bzBuffToBuffDecompress (bz_buf, &bz_size, in_buf, in_size, 0, 0); - logf (LOG_LOG, "decompress %5d %5d", in_size, bz_size); - if (i != BZ_OK) + bz_size = entry.size * 20 + 100; + while (1) { - logf (LOG_FATAL, "bzBuffToBuffDecompress error code=%d", i); - exit (1); + bz_buf = (char *) xmalloc (bz_size); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffDecompress +#else + i = bzBuffToBuffDecompress +#endif + (bz_buf, &bz_size, in_buf, in_size, 0, 0); + logf (LOG_LOG, "decompress %5d %5d", in_size, bz_size); + if (i == BZ_OK) + break; + logf (LOG_LOG, "failed"); + xfree (bz_buf); + bz_size *= 2; } in_buf = bz_buf; in_size = bz_size; @@ -752,8 +679,9 @@ Record rec_get (Records p, int sysno) { if (rec->info[i] && rec->size[i]) { - char *np = xmalloc (rec->size[i]); + char *np = xmalloc (rec->size[i]+1); memcpy (np, rec->info[i], rec->size[i]); + np[rec->size[i]] = '\0'; rec->info[i] = np; } else @@ -767,7 +695,17 @@ Record rec_get (Records p, int sysno) return rec; } -Record rec_new (Records p) +Record rec_get (Records p, int sysno) +{ + Record rec; + zebra_mutex_lock (&p->mutex); + + rec = rec_get_int (p, sysno); + zebra_mutex_unlock (&p->mutex); + return rec; +} + +static Record rec_new_int (Records p) { int sysno, i; Record rec; @@ -795,10 +733,21 @@ Record rec_new (Records p) return rec; } +Record rec_new (Records p) +{ + Record rec; + zebra_mutex_lock (&p->mutex); + + rec = rec_new_int (p); + zebra_mutex_unlock (&p->mutex); + return rec; +} + void rec_del (Records p, Record *recpp) { Record *recp; + zebra_mutex_lock (&p->mutex); (p->head.no_records)--; if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagDelete))) { @@ -810,6 +759,7 @@ void rec_del (Records p, Record *recpp) rec_cache_insert (p, *recpp, recordFlagDelete); rec_rm (recpp); } + zebra_mutex_unlock (&p->mutex); *recpp = NULL; } @@ -817,6 +767,7 @@ void rec_put (Records p, Record *recpp) { Record *recp; + zebra_mutex_lock (&p->mutex); if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagWrite))) { rec_rm (recp); @@ -827,6 +778,7 @@ void rec_put (Records p, Record *recpp) rec_cache_insert (p, *recpp, recordFlagWrite); rec_rm (recpp); } + zebra_mutex_unlock (&p->mutex); *recpp = NULL; }