X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frecindex.c;h=72e8259ce00f3ae3c4f97096a3857b8d89dc880e;hb=bb96b42dc69e6dfa37ee277333e6de0df85fb070;hp=2fe16b8cdeb9e872cf4248a03f1ad2ba211feabf;hpb=795af4e3c7346eff351ff387228ec548956eada8;p=idzebra-moved-to-github.git diff --git a/index/recindex.c b/index/recindex.c index 2fe16b8..72e8259 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -4,7 +4,30 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recindex.c,v $ - * Revision 1.24 1999-06-25 13:48:02 adam + * Revision 1.31 2001-02-26 22:14:59 adam + * Updated for BZIP2 1.0.X. Configure script doesn't enable 64 bit LFS + * on broken glibc on Redhat 7.0. + * + * Revision 1.30 2000/07/13 10:14:20 heikki + * Removed compiler warnings when making zebra + * + * Revision 1.29 2000/04/05 09:49:35 adam + * On Unix, zebra/z'mbol uses automake. + * + * Revision 1.28 1999/12/08 22:44:45 adam + * Zebra/Z'mbol dependencies added. + * + * Revision 1.27 1999/10/29 10:02:33 adam + * Fixed decompression buffer overflow. + * + * Revision 1.26 1999/07/06 13:34:57 adam + * Fixed bug (introduced by previous commit). + * + * Revision 1.25 1999/07/06 12:28:04 adam + * Updated record index structure. Format includes version ID. Compression + * algorithm ID is stored for each record block. + * + * Revision 1.24 1999/06/25 13:48:02 adam * Updated MSVC project files. * Added BZIP2 record compression (not very well tested). * @@ -180,11 +203,10 @@ static void rec_release_blocks (Records p, int sysno) { struct record_index_entry entry; int freeblock; - int block_and_ref[2]; + char block_and_ref[sizeof(short) + sizeof(int)]; int dst_type; int first = 1; - logf (LOG_LOG, "release_blocks for sysno=%d", sysno); if (read_indx (p, sysno, &entry, sizeof(entry), 1) != 1) return ; @@ -203,8 +225,11 @@ static void rec_release_blocks (Records p, int sysno) } if (first) { - block_and_ref[1]--; - if (block_and_ref[1]) + short ref; + memcpy (&ref, block_and_ref + sizeof(int), sizeof(ref)); + --ref; + memcpy (block_and_ref + sizeof(int), &ref, sizeof(ref)); + if (ref) { if (bf_write (p->data_BFile[dst_type], freeblock, 0, sizeof(block_and_ref), block_and_ref)) @@ -224,7 +249,7 @@ static void rec_release_blocks (Records p, int sysno) exit (1); } p->head.block_free[dst_type] = freeblock; - freeblock = block_and_ref[0]; + memcpy (&freeblock, block_and_ref, sizeof(int)); p->head.block_used[dst_type]--; } @@ -243,40 +268,6 @@ static void rec_delete_single (Records p, Record rec) write_indx (p, rec->sysno, &entry, sizeof(entry)); } -static void rec_write_tmp_buf (Records p, int size, int *sysnos); - -static void rec_write_single (Records p, Record rec) -{ - - int sysnos[2]; - int i, size = 0; - char *cptr; - - logf (LOG_LOG, " rec_write_single !!!!!!!!!!!!!!!!!!!!!!!!!!!!"); - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - size += sizeof(*rec->size); - else - size += sizeof(*rec->size) + rec->size[i]; - - rec_tmp_expand (p, size); - - cptr = p->tmp_buf + sizeof(int); /* a hack! */ - for (i = 0; i < REC_NO_INFO; i++) - { - memcpy (cptr, &rec->size[i], sizeof(*rec->size)); - cptr += sizeof(*rec->size); - if (rec->info[i]) - { - memcpy (cptr, rec->info[i], rec->size[i]); - cptr += rec->size[i]; - } - } - sysnos[0] = rec->sysno; - sysnos[1] = -1; - rec_write_tmp_buf (p, size, sysnos); -} - static void rec_write_tmp_buf (Records p, int size, int *sysnos) { struct record_index_entry entry; @@ -333,18 +324,14 @@ static void rec_write_tmp_buf (Records p, int size, int *sysnos) sizeof(int) + (p->tmp_buf+size) - cptr, cptr); } -static void rec_update_single (Records p, Record rec) -{ - rec_release_blocks (p, rec->sysno); - rec_write_single (p, rec); -} - -Records rec_open (BFiles bfs, int rw) +Records rec_open (BFiles bfs, int rw, int compression_method) { Records p; int i, r; + int version; p = (Records) xmalloc (sizeof(*p)); + p->compression_method = compression_method; p->rw = rw; p->tmp_size = 1024; p->tmp_buf = (char *) xmalloc (p->tmp_size); @@ -360,6 +347,7 @@ Records rec_open (BFiles bfs, int rw) { case 0: memcpy (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); + sprintf (p->head.version, "%3d", REC_VERSION); p->head.index_free = 0; p->head.index_last = 1; p->head.no_records = 0; @@ -375,7 +363,7 @@ Records rec_open (BFiles bfs, int rw) for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; - p->head.block_move[i] = p->head.block_size[i] * 3; + p->head.block_move[i] = p->head.block_size[i] * 24; } if (rw) rec_write_head (p); @@ -384,9 +372,16 @@ Records rec_open (BFiles bfs, int rw) memcpy (&p->head, p->tmp_buf, sizeof(p->head)); if (memcmp (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) { - logf (LOG_FATAL, "read %s. bad header", p->index_fname); + logf (LOG_FATAL, "file %s has bad format", p->index_fname); exit (1); } + version = atoi (p->head.version); + if (version != REC_VERSION) + { + logf (LOG_FATAL, "file %s is version %d, but version" + " %d is required", p->index_fname, version, REC_VERSION); + exit (1); + } break; } for (i = 0; icache_cur = 0; p->record_cache = (struct record_cache_entry *) xmalloc (sizeof(*p->record_cache)*p->cache_max); + zebra_mutex_init (&p->mutex); return p; } @@ -443,6 +439,7 @@ static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) (*len)++; *np = n; } + static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, char **out_buf, int *out_size, int *out_offset) @@ -491,7 +488,8 @@ static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, static void rec_write_multiple (Records p, int saveCount) { int i; - int ref_count = 0; + short ref_count = 0; + char compression_method; Record last_rec = 0; int out_size = 1000; int out_offset = 0; @@ -521,31 +519,62 @@ static void rec_write_multiple (Records p, int saveCount) e->flag = recordFlagNop; last_rec = e->rec; break; + case recordFlagDelete: + rec_delete_single (p, e->rec); + e->flag = recordFlagNop; + break; default: break; } } + *sysnop = -1; if (ref_count) { - int csize = out_offset + (out_offset >> 6) + 620; - - rec_tmp_expand (p, csize); -#if HAVE_BZLIB_H - i = bzBuffToBuffCompress (p->tmp_buf+2*sizeof(int), &csize, - out_buf, out_offset, 9, 0, 30); - if (i != BZ_OK) + int csize = 0; /* indicate compression "not performed yet" */ + compression_method = p->compression_method; + switch (compression_method) { - logf (LOG_FATAL, "bzBuffToCompress error code=%d", i); - exit (1); - } + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + rec_tmp_expand (p, csize); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffCompress #else - memcpy (p->tmp_buf + 2*sizeof(int), out_buf, out_offset); - csize = out_offset; + i = bzBuffToBuffCompress #endif + (p->tmp_buf+sizeof(int)+sizeof(short)+ + sizeof(char), + &csize, out_buf, out_offset, 1, 0, 30); + if (i != BZ_OK) + { + logf (LOG_WARN, "bzBuffToBuffCompress error code=%d", i); + csize = 0; + } + logf (LOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, + csize); +#endif + break; + case REC_COMPRESS_NONE: + break; + } + if (!csize) + { + /* either no compression or compression not supported ... */ + csize = out_offset; + rec_tmp_expand (p, csize); + memcpy (p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char), + out_buf, out_offset); + csize = out_offset; + compression_method = REC_COMPRESS_NONE; + } memcpy (p->tmp_buf + sizeof(int), &ref_count, sizeof(ref_count)); + memcpy (p->tmp_buf + sizeof(int)+sizeof(short), + &compression_method, sizeof(compression_method)); + /* -------- compression */ - rec_write_tmp_buf (p, csize + sizeof(int), sysnos); + rec_write_tmp_buf (p, csize + sizeof(short) + sizeof(char), sysnos); } xfree (out_buf); xfree (sysnos); @@ -559,25 +588,13 @@ static void rec_cache_flush (Records p, int saveCount) saveCount = 0; rec_write_multiple (p, saveCount); + for (i = 0; icache_cur - saveCount; i++) { struct record_cache_entry *e = p->record_cache + i; - switch (e->flag) - { - case recordFlagNop: - break; - case recordFlagNew: - rec_write_single (p, e->rec); - break; - case recordFlagWrite: - rec_update_single (p, e->rec); - break; - case recordFlagDelete: - rec_delete_single (p, e->rec); - break; - } rec_rm (&e->rec); - } + } + /* i still being used ... */ for (j = 0; jrecord_cache+j, p->record_cache+i, sizeof(*p->record_cache)); @@ -617,7 +634,7 @@ static void rec_cache_insert (Records p, Record rec, enum recordCacheFlag flag) for (j = 0; jsize[j]; } - if (used > 256000) + if (used > 90000) rec_cache_flush (p, 1); } assert (p->cache_cur < p->cache_max); @@ -634,6 +651,7 @@ void rec_close (Records *pp) assert (p); + zebra_mutex_destroy (&p->mutex); rec_cache_flush (p, 0); xfree (p->record_cache); @@ -654,15 +672,19 @@ void rec_close (Records *pp) *pp = NULL; } - -Record rec_get (Records p, int sysno) +static Record rec_get_int (Records p, int sysno) { - int i, in_size; + int i, in_size, r; Record rec, *recp; struct record_index_entry entry; int freeblock, dst_type; char *nptr, *cptr; char *in_buf = 0; + char *bz_buf = 0; +#if HAVE_BZLIB_H + int bz_size; +#endif + char compression_method; assert (sysno > 0); assert (p); @@ -670,7 +692,7 @@ Record rec_get (Records p, int sysno) if ((recp = rec_cache_lookup (p, sysno, recordFlagNop))) return rec_cp (*recp); - if (!read_indx (p, sysno, &entry, sizeof(entry), 1)) + if (read_indx (p, sysno, &entry, sizeof(entry), 1) < 1) return NULL; /* record is not there! */ if (!entry.size) @@ -682,11 +704,12 @@ Record rec_get (Records p, int sysno) assert (freeblock > 0); - rec = (Record) xmalloc (sizeof(*rec)); rec_tmp_expand (p, entry.size); cptr = p->tmp_buf; - bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; memcpy (&freeblock, cptr, sizeof(freeblock)); while (freeblock) @@ -696,26 +719,53 @@ Record rec_get (Records p, int sysno) cptr += p->head.block_size[dst_type] - sizeof(freeblock); memcpy (&tmp, cptr, sizeof(tmp)); - bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); + if (r < 0) + return 0; memcpy (&freeblock, cptr, sizeof(freeblock)); memcpy (cptr, &tmp, sizeof(tmp)); } + rec = (Record) xmalloc (sizeof(*rec)); rec->sysno = sysno; -#if HAVE_BZLIB_H - in_size = entry.size * 30+100; - in_buf = (char *) xmalloc (in_size); - i = bzBuffToBuffDecompress (in_buf, &in_size, p->tmp_buf+2*sizeof(int), - entry.size-sizeof(int), 0, 4); - if (i != BZ_OK) + memcpy (&compression_method, p->tmp_buf + sizeof(int) + sizeof(short), + sizeof(compression_method)); + in_buf = p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char); + in_size = entry.size - sizeof(short) - sizeof(char); + switch (compression_method) { - logf (LOG_FATAL, "bzBuffToDecompress error code=%d", i); - exit (1); - } + case REC_COMPRESS_BZIP2: +#if HAVE_BZLIB_H + bz_size = entry.size * 20 + 100; + while (1) + { + bz_buf = (char *) xmalloc (bz_size); +#ifdef BZ_CONFIG_ERROR + i = BZ2_bzBuffToBuffDecompress #else - in_buf = p->tmp_buf + 2*sizeof(int); - in_size = entry.size - sizeof(int); + i = bzBuffToBuffDecompress #endif + (bz_buf, &bz_size, in_buf, in_size, 0, 0); + logf (LOG_LOG, "decompress %5d %5d", in_size, bz_size); + if (i == BZ_OK) + break; + logf (LOG_LOG, "failed"); + xfree (bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; +#else + logf (LOG_FATAL, "cannot decompress record(s) in BZIP2 format"); + exit (1); +#endif + break; + case REC_COMPRESS_NONE: + break; + } + for (i = 0; iinfo[i] = 0; + nptr = in_buf; /* skip ref count */ while (nptr < in_buf + in_size) { @@ -736,8 +786,7 @@ Record rec_get (Records p, int sysno) if (rec->size[i]) { - rec->info[i] = (char *) xmalloc (rec->size[i]); - memcpy (rec->info[i], nptr, rec->size[i]); + rec->info[i] = nptr; nptr += rec->size[i]; } else @@ -746,12 +795,36 @@ Record rec_get (Records p, int sysno) if (this_sysno == sysno) break; } - xfree (in_buf); + for (i = 0; iinfo[i] && rec->size[i]) + { + char *np = xmalloc (rec->size[i]); + memcpy (np, rec->info[i], rec->size[i]); + rec->info[i] = np; + } + else + { + assert (rec->info[i] == 0); + assert (rec->size[i] == 0); + } + } + xfree (bz_buf); rec_cache_insert (p, rec, recordFlagNop); return rec; } -Record rec_new (Records p) +Record rec_get (Records p, int sysno) +{ + Record rec; + zebra_mutex_lock (&p->mutex); + + rec = rec_get_int (p, sysno); + zebra_mutex_unlock (&p->mutex); + return rec; +} + +static Record rec_new_int (Records p) { int sysno, i; Record rec; @@ -768,6 +841,14 @@ Record rec_new (Records p) sysno = p->head.index_free; p->head.index_free = entry.next; } +#if ZMBOL +#else + if (sysno > 100000) + { + logf (LOG_FATAL, "100,000 record limit reached"); + exit (1); + } +#endif (p->head.no_records)++; rec->sysno = sysno; for (i = 0; i < REC_NO_INFO; i++) @@ -779,10 +860,21 @@ Record rec_new (Records p) return rec; } +Record rec_new (Records p) +{ + Record rec; + zebra_mutex_lock (&p->mutex); + + rec = rec_new_int (p); + zebra_mutex_unlock (&p->mutex); + return rec; +} + void rec_del (Records p, Record *recpp) { Record *recp; + zebra_mutex_lock (&p->mutex); (p->head.no_records)--; if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagDelete))) { @@ -794,6 +886,7 @@ void rec_del (Records p, Record *recpp) rec_cache_insert (p, *recpp, recordFlagDelete); rec_rm (recpp); } + zebra_mutex_unlock (&p->mutex); *recpp = NULL; } @@ -801,6 +894,7 @@ void rec_put (Records p, Record *recpp) { Record *recp; + zebra_mutex_lock (&p->mutex); if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagWrite))) { rec_rm (recp); @@ -811,6 +905,7 @@ void rec_put (Records p, Record *recpp) rec_cache_insert (p, *recpp, recordFlagWrite); rec_rm (recpp); } + zebra_mutex_unlock (&p->mutex); *recpp = NULL; }