X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frecindex.c;h=f72a0d06f79077cbd154a365f17a593f4241c836;hb=7c3a0352f0492609a3b6b26b63a72b0b2d207aab;hp=72e8259ce00f3ae3c4f97096a3857b8d89dc880e;hpb=04a991ce4fba7c149df537f3050af0e600c23b28;p=idzebra-moved-to-github.git diff --git a/index/recindex.c b/index/recindex.c index 72e8259..f72a0d0 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,140 +1,35 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: recindex.c,v $ - * Revision 1.31 2001-02-26 22:14:59 adam - * Updated for BZIP2 1.0.X. Configure script doesn't enable 64 bit LFS - * on broken glibc on Redhat 7.0. - * - * Revision 1.30 2000/07/13 10:14:20 heikki - * Removed compiler warnings when making zebra - * - * Revision 1.29 2000/04/05 09:49:35 adam - * On Unix, zebra/z'mbol uses automake. - * - * Revision 1.28 1999/12/08 22:44:45 adam - * Zebra/Z'mbol dependencies added. - * - * Revision 1.27 1999/10/29 10:02:33 adam - * Fixed decompression buffer overflow. - * - * Revision 1.26 1999/07/06 13:34:57 adam - * Fixed bug (introduced by previous commit). - * - * Revision 1.25 1999/07/06 12:28:04 adam - * Updated record index structure. Format includes version ID. Compression - * algorithm ID is stored for each record block. - * - * Revision 1.24 1999/06/25 13:48:02 adam - * Updated MSVC project files. - * Added BZIP2 record compression (not very well tested). - * - * Revision 1.23 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.22 1999/02/18 12:49:34 adam - * Changed file naming scheme for register files as well as record - * store/index files. - * - * Revision 1.21 1999/02/02 14:51:03 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.20 1998/01/12 15:04:08 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.19 1997/09/17 12:19:16 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.18 1997/07/15 16:28:42 adam - * Bug fix: storeData didn't work with files with multiple records. - * Bug fix: fixed memory management with records; not really well - * thought through. - * - * Revision 1.17 1997/02/12 20:39:46 adam - * Implemented options -f that limits the log to the first - * records. - * Changed some log messages also. - * - * Revision 1.16 1996/06/04 10:19:00 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.15 1996/05/13 14:23:06 adam - * Work on compaction of set/use bytes in dictionary. - * - * Revision 1.14 1996/02/01 20:48:15 adam - * The total size of records are always checked in rec_cache_insert to - * reduce memory usage. - * - * Revision 1.13 1995/12/11 09:12:49 adam - * The rec_get function returns NULL if record doesn't exist - will - * happen in the server if the result set records have been deleted since - * the creation of the set (i.e. the search). - * The server saves a result temporarily if it is 'volatile', i.e. the - * set is register dependent. - * - * Revision 1.12 1995/12/07 17:38:47 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.11 1995/12/06 13:58:26 adam - * Improved flushing of records - all flushes except the last one - * don't write the last accessed. Also flush takes place if record - * info occupy more than about 256k. - * - * Revision 1.10 1995/12/06 12:41:24 adam - * New command 'stat' for the index program. - * Filenames can be read from stdin by specifying '-'. - * Bug fix/enhancement of the transformation from terms to regular - * expressons in the search engine. - * - * Revision 1.9 1995/11/30 08:34:33 adam - * Started work on commit facility. - * Changed a few malloc/free to xmalloc/xfree. - * - * Revision 1.8 1995/11/28 14:26:21 adam - * Bug fix: recordId with constant wasn't right. - * Bug fix: recordId dictionary entry wasn't deleted when needed. - * - * Revision 1.7 1995/11/28 09:09:43 adam - * Zebra config renamed. - * Use setting 'recordId' to identify record now. - * Bug fix in recindex.c: rec_release_blocks was invokeded even - * though the blocks were already released. - * File traversal properly deletes records when needed. - * - * Revision 1.6 1995/11/25 10:24:06 adam - * More record fields - they are enumerated now. - * New options: flagStoreData flagStoreKey. - * - * Revision 1.5 1995/11/22 17:19:18 adam - * Record management uses the bfile system. - * - * Revision 1.4 1995/11/20 16:59:46 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.3 1995/11/16 15:34:55 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.2 1995/11/15 19:13:08 adam - * Work on record management. - * - * Revision 1.1 1995/11/15 14:46:20 adam - * Started work on better record management system. - * - */ +/* $Id: recindex.c,v 1.37 2004-08-18 17:02:05 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ /* * Format of first block - * next (4 bytes) - * ref_count (4 bytes) - * block (504 bytes) + * next (8 bytes) + * ref_count (2 bytes) + * block (500 bytes) * * Format of subsequent blocks - * next (4 bytes) - * block (508 bytes) + * next (8 bytes) + * block (502 bytes) * * Format of each record * sysno @@ -176,13 +71,13 @@ static void rec_tmp_expand (Records p, int size) } } -static int read_indx (Records p, int sysno, void *buf, int itemsize, +static int read_indx (Records p, SYSNO sysno, void *buf, int itemsize, int ignoreError) { int r; - int pos = (sysno-1)*itemsize; + zint pos = (sysno-1)*itemsize; - r = bf_read (p->index_BFile, 1+pos/128, pos%128, itemsize, buf); + r = bf_read (p->index_BFile, 1+pos/128, (int) (pos%128), itemsize, buf); if (r != 1 && !ignoreError) { logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", @@ -192,18 +87,18 @@ static int read_indx (Records p, int sysno, void *buf, int itemsize, return r; } -static void write_indx (Records p, int sysno, void *buf, int itemsize) +static void write_indx (Records p, SYSNO sysno, void *buf, int itemsize) { - int pos = (sysno-1)*itemsize; + zint pos = (sysno-1)*itemsize; - bf_write (p->index_BFile, 1+pos/128, pos%128, itemsize, buf); + bf_write (p->index_BFile, 1+pos/128, (int) (pos%128), itemsize, buf); } -static void rec_release_blocks (Records p, int sysno) +static void rec_release_blocks (Records p, SYSNO sysno) { struct record_index_entry entry; - int freeblock; - char block_and_ref[sizeof(short) + sizeof(int)]; + zint freeblock; + char block_and_ref[sizeof(zint) + sizeof(short)]; int dst_type; int first = 1; @@ -212,13 +107,14 @@ static void rec_release_blocks (Records p, int sysno) freeblock = entry.next; assert (freeblock > 0); - dst_type = freeblock & 7; + dst_type = (int) (freeblock & 7); assert (dst_type < REC_BLOCK_TYPES); freeblock = freeblock / 8; while (freeblock) { if (bf_read (p->data_BFile[dst_type], freeblock, 0, - sizeof(block_and_ref), block_and_ref) != 1) + first ? sizeof(block_and_ref) : sizeof(zint), + block_and_ref) != 1) { logf (LOG_FATAL|LOG_ERRNO, "read in rec_del_single"); exit (1); @@ -226,9 +122,9 @@ static void rec_release_blocks (Records p, int sysno) if (first) { short ref; - memcpy (&ref, block_and_ref + sizeof(int), sizeof(ref)); + memcpy (&ref, block_and_ref + sizeof(freeblock), sizeof(ref)); --ref; - memcpy (block_and_ref + sizeof(int), &ref, sizeof(ref)); + memcpy (block_and_ref + sizeof(freeblock), &ref, sizeof(ref)); if (ref) { if (bf_write (p->data_BFile[dst_type], freeblock, 0, @@ -249,7 +145,7 @@ static void rec_release_blocks (Records p, int sysno) exit (1); } p->head.block_free[dst_type] = freeblock; - memcpy (&freeblock, block_and_ref, sizeof(int)); + memcpy (&freeblock, block_and_ref, sizeof(freeblock)); p->head.block_used[dst_type]--; } @@ -268,12 +164,12 @@ static void rec_delete_single (Records p, Record rec) write_indx (p, rec->sysno, &entry, sizeof(entry)); } -static void rec_write_tmp_buf (Records p, int size, int *sysnos) +static void rec_write_tmp_buf (Records p, int size, SYSNO *sysnos) { struct record_index_entry entry; int no_written = 0; char *cptr = p->tmp_buf; - int block_prev = -1, block_free; + zint block_prev = -1, block_free; int dst_type = 0; int i; @@ -289,7 +185,7 @@ static void rec_write_tmp_buf (Records p, int size, int *sysnos) block_free, 0, sizeof(*p->head.block_free), &p->head.block_free[dst_type]) != 1) { - logf (LOG_FATAL|LOG_ERRNO, "read in %s at free block %d", + logf (LOG_FATAL|LOG_ERRNO, "read in %s at free block " ZINT_FORMAT, p->data_fname[dst_type], block_free); exit (1); } @@ -309,19 +205,19 @@ static void rec_write_tmp_buf (Records p, int size, int *sysnos) } else { - memcpy (cptr, &block_free, sizeof(int)); + memcpy (cptr, &block_free, sizeof(block_free)); bf_write (p->data_BFile[dst_type], block_prev, 0, 0, cptr); cptr = p->tmp_buf + no_written; } block_prev = block_free; - no_written += p->head.block_size[dst_type] - sizeof(int); + no_written += p->head.block_size[dst_type] - sizeof(zint); p->head.block_used[dst_type]++; } assert (block_prev != -1); block_free = 0; - memcpy (cptr, &block_free, sizeof(int)); + memcpy (cptr, &block_free, sizeof(block_free)); bf_write (p->data_BFile[dst_type], block_prev, 0, - sizeof(int) + (p->tmp_buf+size) - cptr, cptr); + sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr); } Records rec_open (BFiles bfs, int rw, int compression_method) @@ -440,6 +336,36 @@ static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) *np = n; } +static void rec_encode_zint (zint n, unsigned char *buf, int *len) +{ + (*len) = 0; + while (n > 127) + { + buf[*len] = (unsigned) (128 + (n & 127)); + n = n >> 7; + (*len)++; + } + buf[*len] = (unsigned) n; + (*len)++; +} + +static void rec_decode_zint(zint *np, unsigned char *buf, int *len) +{ + zint n = 0; + zint w = 1; + (*len) = 0; + + while (buf[*len] > 127) + { + n += w*(buf[*len] & 127); + w = w << 7; + (*len)++; + } + n += w * buf[*len]; + (*len)++; + *np = n; +} + static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, char **out_buf, int *out_size, int *out_offset) @@ -461,7 +387,7 @@ static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, } if (i == 0) { - rec_encode_unsigned (rec->sysno, *out_buf + *out_offset, &len); + rec_encode_zint (rec->sysno, *out_buf + *out_offset, &len); (*out_offset) += len; } if (rec->size[i] == 0) @@ -494,8 +420,8 @@ static void rec_write_multiple (Records p, int saveCount) int out_size = 1000; int out_offset = 0; char *out_buf = (char *) xmalloc (out_size); - int *sysnos = (int *) xmalloc (sizeof(*sysnos) * (p->cache_cur + 1)); - int *sysnop = sysnos; + SYSNO *sysnos = (SYSNO *) xmalloc (sizeof(*sysnos) * (p->cache_cur + 1)); + SYSNO *sysnop = sysnos; for (i = 0; icache_cur - saveCount; i++) { @@ -544,7 +470,7 @@ static void rec_write_multiple (Records p, int saveCount) #else i = bzBuffToBuffCompress #endif - (p->tmp_buf+sizeof(int)+sizeof(short)+ + (p->tmp_buf+sizeof(zint)+sizeof(short)+ sizeof(char), &csize, out_buf, out_offset, 1, 0, 30); if (i != BZ_OK) @@ -564,13 +490,13 @@ static void rec_write_multiple (Records p, int saveCount) /* either no compression or compression not supported ... */ csize = out_offset; rec_tmp_expand (p, csize); - memcpy (p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char), + memcpy (p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char), out_buf, out_offset); csize = out_offset; compression_method = REC_COMPRESS_NONE; } - memcpy (p->tmp_buf + sizeof(int), &ref_count, sizeof(ref_count)); - memcpy (p->tmp_buf + sizeof(int)+sizeof(short), + memcpy (p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count)); + memcpy (p->tmp_buf + sizeof(zint)+sizeof(short), &compression_method, sizeof(compression_method)); /* -------- compression */ @@ -601,7 +527,7 @@ static void rec_cache_flush (Records p, int saveCount) p->cache_cur = saveCount; } -static Record *rec_cache_lookup (Records p, int sysno, +static Record *rec_cache_lookup (Records p, SYSNO sysno, enum recordCacheFlag flag) { int i; @@ -672,12 +598,13 @@ void rec_close (Records *pp) *pp = NULL; } -static Record rec_get_int (Records p, int sysno) +static Record rec_get_int (Records p, SYSNO sysno) { int i, in_size, r; Record rec, *recp; struct record_index_entry entry; - int freeblock, dst_type; + zint freeblock; + int dst_type; char *nptr, *cptr; char *in_buf = 0; char *bz_buf = 0; @@ -698,7 +625,7 @@ static Record rec_get_int (Records p, int sysno) if (!entry.size) return NULL; /* record is deleted */ - dst_type = entry.next & 7; + dst_type = (int) (entry.next & 7); assert (dst_type < REC_BLOCK_TYPES); freeblock = entry.next / 8; @@ -714,7 +641,7 @@ static Record rec_get_int (Records p, int sysno) while (freeblock) { - int tmp; + zint tmp; cptr += p->head.block_size[dst_type] - sizeof(freeblock); @@ -728,9 +655,9 @@ static Record rec_get_int (Records p, int sysno) rec = (Record) xmalloc (sizeof(*rec)); rec->sysno = sysno; - memcpy (&compression_method, p->tmp_buf + sizeof(int) + sizeof(short), + memcpy (&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short), sizeof(compression_method)); - in_buf = p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char); + in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char); in_size = entry.size - sizeof(short) - sizeof(char); switch (compression_method) { @@ -769,9 +696,9 @@ static Record rec_get_int (Records p, int sysno) nptr = in_buf; /* skip ref count */ while (nptr < in_buf + in_size) { - int this_sysno; + zint this_sysno; int len; - rec_decode_unsigned (&this_sysno, nptr, &len); + rec_decode_zint (&this_sysno, nptr, &len); nptr += len; for (i = 0; i < REC_NO_INFO; i++) @@ -799,8 +726,9 @@ static Record rec_get_int (Records p, int sysno) { if (rec->info[i] && rec->size[i]) { - char *np = xmalloc (rec->size[i]); + char *np = xmalloc (rec->size[i]+1); memcpy (np, rec->info[i], rec->size[i]); + np[rec->size[i]] = '\0'; rec->info[i] = np; } else @@ -814,7 +742,7 @@ static Record rec_get_int (Records p, int sysno) return rec; } -Record rec_get (Records p, int sysno) +Record rec_get (Records p, SYSNO sysno) { Record rec; zebra_mutex_lock (&p->mutex); @@ -826,7 +754,8 @@ Record rec_get (Records p, int sysno) static Record rec_new_int (Records p) { - int sysno, i; + int i; + SYSNO sysno; Record rec; assert (p); @@ -841,14 +770,6 @@ static Record rec_new_int (Records p) sysno = p->head.index_free; p->head.index_free = entry.next; } -#if ZMBOL -#else - if (sysno > 100000) - { - logf (LOG_FATAL, "100,000 record limit reached"); - exit (1); - } -#endif (p->head.no_records)++; rec->sysno = sysno; for (i = 0; i < REC_NO_INFO; i++)