X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frecindex.c;h=e41c56e8396fc2bb0f01e1526c00bf43ab1bc3c2;hp=4563fc469ec16fce847277ded74acfb0dd7b7305;hb=27bdd6aa26843aeac89f635ed495996088d8e8aa;hpb=81a75ae20b5b250309a70de51bfdf8de0019f882 diff --git a/index/recindex.c b/index/recindex.c index 4563fc4..e41c56e 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,956 +1,358 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: recindex.c,v $ - * Revision 1.32 2002-04-05 08:46:26 adam - * Zebra with full functionality - * - * Revision 1.31 2001/02/26 22:14:59 adam - * Updated for BZIP2 1.0.X. Configure script doesn't enable 64 bit LFS - * on broken glibc on Redhat 7.0. - * - * Revision 1.30 2000/07/13 10:14:20 heikki - * Removed compiler warnings when making zebra - * - * Revision 1.29 2000/04/05 09:49:35 adam - * On Unix, zebra/z'mbol uses automake. - * - * Revision 1.28 1999/12/08 22:44:45 adam - * Zebra/Z'mbol dependencies added. - * - * Revision 1.27 1999/10/29 10:02:33 adam - * Fixed decompression buffer overflow. - * - * Revision 1.26 1999/07/06 13:34:57 adam - * Fixed bug (introduced by previous commit). - * - * Revision 1.25 1999/07/06 12:28:04 adam - * Updated record index structure. Format includes version ID. Compression - * algorithm ID is stored for each record block. - * - * Revision 1.24 1999/06/25 13:48:02 adam - * Updated MSVC project files. - * Added BZIP2 record compression (not very well tested). - * - * Revision 1.23 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.22 1999/02/18 12:49:34 adam - * Changed file naming scheme for register files as well as record - * store/index files. - * - * Revision 1.21 1999/02/02 14:51:03 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.20 1998/01/12 15:04:08 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.19 1997/09/17 12:19:16 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.18 1997/07/15 16:28:42 adam - * Bug fix: storeData didn't work with files with multiple records. - * Bug fix: fixed memory management with records; not really well - * thought through. - * - * Revision 1.17 1997/02/12 20:39:46 adam - * Implemented options -f that limits the log to the first - * records. - * Changed some log messages also. - * - * Revision 1.16 1996/06/04 10:19:00 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.15 1996/05/13 14:23:06 adam - * Work on compaction of set/use bytes in dictionary. - * - * Revision 1.14 1996/02/01 20:48:15 adam - * The total size of records are always checked in rec_cache_insert to - * reduce memory usage. - * - * Revision 1.13 1995/12/11 09:12:49 adam - * The rec_get function returns NULL if record doesn't exist - will - * happen in the server if the result set records have been deleted since - * the creation of the set (i.e. the search). - * The server saves a result temporarily if it is 'volatile', i.e. the - * set is register dependent. - * - * Revision 1.12 1995/12/07 17:38:47 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.11 1995/12/06 13:58:26 adam - * Improved flushing of records - all flushes except the last one - * don't write the last accessed. Also flush takes place if record - * info occupy more than about 256k. - * - * Revision 1.10 1995/12/06 12:41:24 adam - * New command 'stat' for the index program. - * Filenames can be read from stdin by specifying '-'. - * Bug fix/enhancement of the transformation from terms to regular - * expressons in the search engine. - * - * Revision 1.9 1995/11/30 08:34:33 adam - * Started work on commit facility. - * Changed a few malloc/free to xmalloc/xfree. - * - * Revision 1.8 1995/11/28 14:26:21 adam - * Bug fix: recordId with constant wasn't right. - * Bug fix: recordId dictionary entry wasn't deleted when needed. - * - * Revision 1.7 1995/11/28 09:09:43 adam - * Zebra config renamed. - * Use setting 'recordId' to identify record now. - * Bug fix in recindex.c: rec_release_blocks was invokeded even - * though the blocks were already released. - * File traversal properly deletes records when needed. - * - * Revision 1.6 1995/11/25 10:24:06 adam - * More record fields - they are enumerated now. - * New options: flagStoreData flagStoreKey. - * - * Revision 1.5 1995/11/22 17:19:18 adam - * Record management uses the bfile system. - * - * Revision 1.4 1995/11/20 16:59:46 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.3 1995/11/16 15:34:55 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.2 1995/11/15 19:13:08 adam - * Work on record management. - * - * Revision 1.1 1995/11/15 14:46:20 adam - * Started work on better record management system. - * - */ +/* This file is part of the Zebra server. + Copyright (C) 2004-2013 Index Data +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. -/* - * Format of first block - * next (4 bytes) - * ref_count (4 bytes) - * block (504 bytes) - * - * Format of subsequent blocks - * next (4 bytes) - * block (508 bytes) - * - * Format of each record - * sysno - * (length, data) - pairs - * length = 0 if same as previous - */ +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#if HAVE_CONFIG_H +#include +#endif #include +#include #include #include -#include "recindxp.h" +#include +#include +#include "recindex.h" -#if HAVE_BZLIB_H -#include -#endif -static void rec_write_head (Records p) -{ - int r; +#define RIDX_CHUNK 128 - assert (p); - assert (p->index_BFile); - r = bf_write (p->index_BFile, 0, 0, sizeof(p->head), &p->head); - if (r) - { - logf (LOG_FATAL|LOG_ERRNO, "write head of %s", p->index_fname); - exit (1); - } -} +struct recindex { + char *index_fname; + BFile index_BFile; + ISAMB isamb; + ISAM_P isam_p; +}; -static void rec_tmp_expand (Records p, int size) -{ - if (p->tmp_size < size + 2048 || - p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2) - { - xfree (p->tmp_buf); - p->tmp_size = size + p->head.block_size[REC_BLOCK_TYPES-1]*2 + 2048; - p->tmp_buf = (char *) xmalloc (p->tmp_size); - } -} +struct record_index_entry { + zint next; /* first block of record info / next free entry */ + int size; /* size of record or 0 if free entry */ +} ent; -static int read_indx (Records p, int sysno, void *buf, int itemsize, - int ignoreError) + +static void rect_log_item(int level, const void *b, const char *txt) { - int r; - int pos = (sysno-1)*itemsize; + zint sys; + int len; + + + memcpy(&sys, b, sizeof(sys)); + len = ((const char *) b)[sizeof(sys)]; - r = bf_read (p->index_BFile, 1+pos/128, pos%128, itemsize, buf); - if (r != 1 && !ignoreError) + if (len == sizeof(struct record_index_entry)) { - logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", - p->index_fname, (long) pos); - exit (1); + memcpy(&ent, (const char *)b + sizeof(sys) + 1, len); + yaz_log(YLOG_LOG, "%s " ZINT_FORMAT " next=" ZINT_FORMAT " sz=%d", txt, sys, + ent.next, ent.size); + } - return r; + else + yaz_log(YLOG_LOG, "%s " ZINT_FORMAT, txt, sys); } -static void write_indx (Records p, int sysno, void *buf, int itemsize) +int rect_compare(const void *a, const void *b) { - int pos = (sysno-1)*itemsize; + zint s_a, s_b; - bf_write (p->index_BFile, 1+pos/128, pos%128, itemsize, buf); + memcpy(&s_a, a, sizeof(s_a)); + memcpy(&s_b, b, sizeof(s_b)); + + if (s_a > s_b) + return 1; + else if (s_a < s_b) + return -1; + return 0; } -static void rec_release_blocks (Records p, int sysno) +void *rect_code_start(void) { - struct record_index_entry entry; - int freeblock; - char block_and_ref[sizeof(short) + sizeof(int)]; - int dst_type; - int first = 1; - - if (read_indx (p, sysno, &entry, sizeof(entry), 1) != 1) - return ; - - freeblock = entry.next; - assert (freeblock > 0); - dst_type = freeblock & 7; - assert (dst_type < REC_BLOCK_TYPES); - freeblock = freeblock / 8; - while (freeblock) - { - if (bf_read (p->data_BFile[dst_type], freeblock, 0, - sizeof(block_and_ref), block_and_ref) != 1) - { - logf (LOG_FATAL|LOG_ERRNO, "read in rec_del_single"); - exit (1); - } - if (first) - { - short ref; - memcpy (&ref, block_and_ref + sizeof(int), sizeof(ref)); - --ref; - memcpy (block_and_ref + sizeof(int), &ref, sizeof(ref)); - if (ref) - { - if (bf_write (p->data_BFile[dst_type], freeblock, 0, - sizeof(block_and_ref), block_and_ref)) - { - logf (LOG_FATAL|LOG_ERRNO, "write in rec_del_single"); - exit (1); - } - return; - } - first = 0; - } - - if (bf_write (p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock), - &p->head.block_free[dst_type])) - { - logf (LOG_FATAL|LOG_ERRNO, "write in rec_del_single"); - exit (1); - } - p->head.block_free[dst_type] = freeblock; - memcpy (&freeblock, block_and_ref, sizeof(int)); - - p->head.block_used[dst_type]--; - } - p->head.total_bytes -= entry.size; + return 0; } -static void rec_delete_single (Records p, Record rec) +void rect_encode(void *p, char **dst, const char **src) { - struct record_index_entry entry; + zint sys; + int len; - rec_release_blocks (p, rec->sysno); + memcpy(&sys, *src, sizeof(sys)); + zebra_zint_encode(dst, sys); + (*src) += sizeof(sys); - entry.next = p->head.index_free; - entry.size = 0; - p->head.index_free = rec->sysno; - write_indx (p, rec->sysno, &entry, sizeof(entry)); -} + len = **src; + **dst = len; + (*src)++; + (*dst)++; -static void rec_write_tmp_buf (Records p, int size, int *sysnos) -{ - struct record_index_entry entry; - int no_written = 0; - char *cptr = p->tmp_buf; - int block_prev = -1, block_free; - int dst_type = 0; - int i; - - for (i = 1; i= p->head.block_move[i]) - dst_type = i; - while (no_written < size) - { - block_free = p->head.block_free[dst_type]; - if (block_free) - { - if (bf_read (p->data_BFile[dst_type], - block_free, 0, sizeof(*p->head.block_free), - &p->head.block_free[dst_type]) != 1) - { - logf (LOG_FATAL|LOG_ERRNO, "read in %s at free block %d", - p->data_fname[dst_type], block_free); - exit (1); - } - } - else - block_free = p->head.block_last[dst_type]++; - if (block_prev == -1) - { - entry.next = block_free*8 + dst_type; - entry.size = size; - p->head.total_bytes += size; - while (*sysnos > 0) - { - write_indx (p, *sysnos, &entry, sizeof(entry)); - sysnos++; - } - } - else - { - memcpy (cptr, &block_free, sizeof(int)); - bf_write (p->data_BFile[dst_type], block_prev, 0, 0, cptr); - cptr = p->tmp_buf + no_written; - } - block_prev = block_free; - no_written += p->head.block_size[dst_type] - sizeof(int); - p->head.block_used[dst_type]++; - } - assert (block_prev != -1); - block_free = 0; - memcpy (cptr, &block_free, sizeof(int)); - bf_write (p->data_BFile[dst_type], block_prev, 0, - sizeof(int) + (p->tmp_buf+size) - cptr, cptr); + memcpy(*dst, *src, len); + *dst += len; + *src += len; } -Records rec_open (BFiles bfs, int rw, int compression_method) +void rect_decode(void *p, char **dst, const char **src) { - Records p; - int i, r; - int version; - - p = (Records) xmalloc (sizeof(*p)); - p->compression_method = compression_method; - p->rw = rw; - p->tmp_size = 1024; - p->tmp_buf = (char *) xmalloc (p->tmp_size); - p->index_fname = "reci"; - p->index_BFile = bf_open (bfs, p->index_fname, 128, rw); - if (p->index_BFile == NULL) - { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->index_fname); - exit (1); - } - r = bf_read (p->index_BFile, 0, 0, 0, p->tmp_buf); - switch (r) - { - case 0: - memcpy (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); - sprintf (p->head.version, "%3d", REC_VERSION); - p->head.index_free = 0; - p->head.index_last = 1; - p->head.no_records = 0; - p->head.total_bytes = 0; - for (i = 0; ihead.block_free[i] = 0; - p->head.block_last[i] = 1; - p->head.block_used[i] = 0; - } - p->head.block_size[0] = 128; - p->head.block_move[0] = 0; - for (i = 1; ihead.block_size[i] = p->head.block_size[i-1] * 4; - p->head.block_move[i] = p->head.block_size[i] * 24; - } - if (rw) - rec_write_head (p); - break; - case 1: - memcpy (&p->head, p->tmp_buf, sizeof(p->head)); - if (memcmp (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) - { - logf (LOG_FATAL, "file %s has bad format", p->index_fname); - exit (1); - } - version = atoi (p->head.version); - if (version != REC_VERSION) - { - logf (LOG_FATAL, "file %s is version %d, but version" - " %d is required", p->index_fname, version, REC_VERSION); - exit (1); - } - break; - } - for (i = 0; idata_fname[i] = (char *) xmalloc (strlen(str)+1); - strcpy (p->data_fname[i], str); - p->data_BFile[i] = NULL; - } - for (i = 0; idata_BFile[i] = bf_open (bfs, p->data_fname[i], - p->head.block_size[i], - rw))) - { - logf (LOG_FATAL|LOG_ERRNO, "bf_open %s", p->data_fname[i]); - exit (1); - } - } - p->cache_max = 400; - p->cache_cur = 0; - p->record_cache = (struct record_cache_entry *) - xmalloc (sizeof(*p->record_cache)*p->cache_max); - zebra_mutex_init (&p->mutex); - return p; + zint sys; + int len; + + zebra_zint_decode(src, &sys); + memcpy(*dst, &sys, sizeof(sys)); + *dst += sizeof(sys); + + len = **src; + **dst = len; + (*src)++; + (*dst)++; + + memcpy(*dst, *src, len); + *dst += len; + *src += len; } -static void rec_encode_unsigned (unsigned n, unsigned char *buf, int *len) +void rect_code_reset(void *p) { - (*len) = 0; - while (n > 127) - { - buf[*len] = 128 + (n & 127); - n = n >> 7; - (*len)++; - } - buf[*len] = n; - (*len)++; } -static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len) +void rect_code_stop(void *p) { - unsigned n = 0; - unsigned w = 1; - (*len) = 0; - - while (buf[*len] > 127) - { - n += w*(buf[*len] & 127); - w = w << 7; - (*len)++; - } - n += w * buf[*len]; - (*len)++; - *np = n; } -static void rec_cache_flush_block1 (Records p, Record rec, Record last_rec, - char **out_buf, int *out_size, - int *out_offset) + +recindex_t recindex_open(BFiles bfs, int rw, int use_isamb) { - int i; - int len; + recindex_t p = xmalloc(sizeof(*p)); + p->index_BFile = 0; + p->isamb = 0; - for (i = 0; iindex_fname = "reci"; + p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); + if (p->index_BFile == NULL) { - if (*out_offset + (int) rec->size[i] + 20 > *out_size) - { - int new_size = *out_offset + rec->size[i] + 65536; - char *np = (char *) xmalloc (new_size); - if (*out_offset) - memcpy (np, *out_buf, *out_offset); - xfree (*out_buf); - *out_size = new_size; - *out_buf = np; - } - if (i == 0) - { - rec_encode_unsigned (rec->sysno, *out_buf + *out_offset, &len); - (*out_offset) += len; - } - if (rec->size[i] == 0) - { - rec_encode_unsigned (1, *out_buf + *out_offset, &len); - (*out_offset) += len; - } - else if (last_rec && rec->size[i] == last_rec->size[i] && - !memcmp (rec->info[i], last_rec->info[i], rec->size[i])) - { - rec_encode_unsigned (0, *out_buf + *out_offset, &len); - (*out_offset) += len; - } - else - { - rec_encode_unsigned (rec->size[i]+1, *out_buf + *out_offset, &len); - (*out_offset) += len; - memcpy (*out_buf + *out_offset, rec->info[i], rec->size[i]); - (*out_offset) += rec->size[i]; - } + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); + xfree(p); + return 0; } -} -static void rec_write_multiple (Records p, int saveCount) -{ - int i; - short ref_count = 0; - char compression_method; - Record last_rec = 0; - int out_size = 1000; - int out_offset = 0; - char *out_buf = (char *) xmalloc (out_size); - int *sysnos = (int *) xmalloc (sizeof(*sysnos) * (p->cache_cur + 1)); - int *sysnop = sysnos; - - for (i = 0; icache_cur - saveCount; i++) + if (use_isamb) { - struct record_cache_entry *e = p->record_cache + i; - switch (e->flag) - { - case recordFlagNew: - rec_cache_flush_block1 (p, e->rec, last_rec, &out_buf, - &out_size, &out_offset); - *sysnop++ = e->rec->sysno; - ref_count++; - e->flag = recordFlagNop; - last_rec = e->rec; - break; - case recordFlagWrite: - rec_release_blocks (p, e->rec->sysno); - rec_cache_flush_block1 (p, e->rec, last_rec, &out_buf, - &out_size, &out_offset); - *sysnop++ = e->rec->sysno; - ref_count++; - e->flag = recordFlagNop; - last_rec = e->rec; - break; - case recordFlagDelete: - rec_delete_single (p, e->rec); - e->flag = recordFlagNop; - break; - default: - break; - } - } + int isam_block_size = 4096; + ISAMC_M method; + + method.compare_item = rect_compare; + method.log_item = rect_log_item; + method.codec.start = rect_code_start; + method.codec.encode = rect_encode; + method.codec.decode = rect_decode; + method.codec.reset = rect_code_reset; + method.codec.stop = rect_code_stop; + + p->index_fname = "rect"; + p->isamb = isamb_open2(bfs, p->index_fname, rw, &method, + /* cache */ 0, + /* no_cat */ 1, &isam_block_size, + /* use_root_ptr */ 1); + + p->isam_p = 0; + if (p->isamb) + p->isam_p = isamb_get_root_ptr(p->isamb); - *sysnop = -1; - if (ref_count) - { - int csize = 0; /* indicate compression "not performed yet" */ - compression_method = p->compression_method; - switch (compression_method) - { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - csize = out_offset + (out_offset >> 6) + 620; - rec_tmp_expand (p, csize); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffCompress -#else - i = bzBuffToBuffCompress -#endif - (p->tmp_buf+sizeof(int)+sizeof(short)+ - sizeof(char), - &csize, out_buf, out_offset, 1, 0, 30); - if (i != BZ_OK) - { - logf (LOG_WARN, "bzBuffToBuffCompress error code=%d", i); - csize = 0; - } - logf (LOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, - csize); -#endif - break; - case REC_COMPRESS_NONE: - break; - } - if (!csize) - { - /* either no compression or compression not supported ... */ - csize = out_offset; - rec_tmp_expand (p, csize); - memcpy (p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char), - out_buf, out_offset); - csize = out_offset; - compression_method = REC_COMPRESS_NONE; - } - memcpy (p->tmp_buf + sizeof(int), &ref_count, sizeof(ref_count)); - memcpy (p->tmp_buf + sizeof(int)+sizeof(short), - &compression_method, sizeof(compression_method)); - - /* -------- compression */ - rec_write_tmp_buf (p, csize + sizeof(short) + sizeof(char), sysnos); } - xfree (out_buf); - xfree (sysnos); + return p; } -static void rec_cache_flush (Records p, int saveCount) +static void log_pr(const char *txt) { - int i, j; - - if (saveCount >= p->cache_cur) - saveCount = 0; - - rec_write_multiple (p, saveCount); - - for (i = 0; icache_cur - saveCount; i++) - { - struct record_cache_entry *e = p->record_cache + i; - rec_rm (&e->rec); - } - /* i still being used ... */ - for (j = 0; jrecord_cache+j, p->record_cache+i, - sizeof(*p->record_cache)); - p->cache_cur = saveCount; + yaz_log(YLOG_LOG, "%s", txt); } -static Record *rec_cache_lookup (Records p, int sysno, - enum recordCacheFlag flag) + +void recindex_close(recindex_t p) { - int i; - for (i = 0; icache_cur; i++) + if (p) { - struct record_cache_entry *e = p->record_cache + i; - if (e->rec->sysno == sysno) + if (p->index_BFile) + bf_close(p->index_BFile); + if (p->isamb) { - if (flag != recordFlagNop && e->flag == recordFlagNop) - e->flag = flag; - return &e->rec; + isamb_set_root_ptr(p->isamb, p->isam_p); + isamb_dump(p->isamb, p->isam_p, log_pr); + isamb_close(p->isamb); } + xfree(p); } - return NULL; } -static void rec_cache_insert (Records p, Record rec, enum recordCacheFlag flag) +int recindex_read_head(recindex_t p, void *buf) { - struct record_cache_entry *e; - - if (p->cache_cur == p->cache_max) - rec_cache_flush (p, 1); - else if (p->cache_cur > 0) - { - int i, j; - int used = 0; - for (i = 0; icache_cur; i++) - { - Record r = (p->record_cache + i)->rec; - for (j = 0; jsize[j]; - } - if (used > 90000) - rec_cache_flush (p, 1); - } - assert (p->cache_cur < p->cache_max); - - e = p->record_cache + (p->cache_cur)++; - e->flag = flag; - e->rec = rec_cp (rec); + return bf_read(p->index_BFile, 0, 0, 0, buf); } -void rec_close (Records *pp) +const char *recindex_get_fname(recindex_t p) { - Records p = *pp; - int i; - - assert (p); + return p->index_fname; +} - zebra_mutex_destroy (&p->mutex); - rec_cache_flush (p, 0); - xfree (p->record_cache); +ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len) +{ + int r; - if (p->rw) - rec_write_head (p); + assert(p); - if (p->index_BFile) - bf_close (p->index_BFile); + assert(p->index_BFile); - for (i = 0; iindex_BFile, 0, 0, len, buf); + if (r) { - if (p->data_BFile[i]) - bf_close (p->data_BFile[i]); - xfree (p->data_fname[i]); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write head of %s", p->index_fname); + return ZEBRA_FAIL; } - xfree (p->tmp_buf); - xfree (p); - *pp = NULL; + return ZEBRA_OK; } -static Record rec_get_int (Records p, int sysno) +int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, + int ignoreError) { - int i, in_size, r; - Record rec, *recp; - struct record_index_entry entry; - int freeblock, dst_type; - char *nptr, *cptr; - char *in_buf = 0; - char *bz_buf = 0; -#if HAVE_BZLIB_H - int bz_size; -#endif - char compression_method; - - assert (sysno > 0); - assert (p); - - if ((recp = rec_cache_lookup (p, sysno, recordFlagNop))) - return rec_cp (*recp); - - if (read_indx (p, sysno, &entry, sizeof(entry), 1) < 1) - return NULL; /* record is not there! */ - - if (!entry.size) - return NULL; /* record is deleted */ - - dst_type = entry.next & 7; - assert (dst_type < REC_BLOCK_TYPES); - freeblock = entry.next / 8; + int r = 0; + if (p->isamb) + { + if (p->isam_p) + { + char item[256]; + char *st = item; + char untilbuf[sizeof(zint) + 1]; - assert (freeblock > 0); - - rec_tmp_expand (p, entry.size); + ISAMB_PP isam_pp = isamb_pp_open(p->isamb, p->isam_p, 1); - cptr = p->tmp_buf; - r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); - if (r < 0) - return 0; - memcpy (&freeblock, cptr, sizeof(freeblock)); + memcpy(untilbuf, &sysno, sizeof(sysno)); + untilbuf[sizeof(sysno)] = 0; + r = isamb_pp_forward(isam_pp, st, untilbuf); - while (freeblock) - { - int tmp; - - cptr += p->head.block_size[dst_type] - sizeof(freeblock); - - memcpy (&tmp, cptr, sizeof(tmp)); - r = bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); - if (r < 0) - return 0; - memcpy (&freeblock, cptr, sizeof(freeblock)); - memcpy (cptr, &tmp, sizeof(tmp)); - } + isamb_pp_close(isam_pp); + if (!r) + return 0; - rec = (Record) xmalloc (sizeof(*rec)); - rec->sysno = sysno; - memcpy (&compression_method, p->tmp_buf + sizeof(int) + sizeof(short), - sizeof(compression_method)); - in_buf = p->tmp_buf + sizeof(int) + sizeof(short) + sizeof(char); - in_size = entry.size - sizeof(short) - sizeof(char); - switch (compression_method) - { - case REC_COMPRESS_BZIP2: -#if HAVE_BZLIB_H - bz_size = entry.size * 20 + 100; - while (1) - { - bz_buf = (char *) xmalloc (bz_size); -#ifdef BZ_CONFIG_ERROR - i = BZ2_bzBuffToBuffDecompress -#else - i = bzBuffToBuffDecompress -#endif - (bz_buf, &bz_size, in_buf, in_size, 0, 0); - logf (LOG_LOG, "decompress %5d %5d", in_size, bz_size); - if (i == BZ_OK) - break; - logf (LOG_LOG, "failed"); - xfree (bz_buf); - bz_size *= 2; - } - in_buf = bz_buf; - in_size = bz_size; -#else - logf (LOG_FATAL, "cannot decompress record(s) in BZIP2 format"); - exit (1); -#endif - break; - case REC_COMPRESS_NONE: - break; + if (item[sizeof(sysno)] != itemsize) + { + yaz_log(YLOG_WARN, "unexpected entry size %d != %d", + item[sizeof(sysno)], itemsize); + return 0; + } + memcpy(buf, item + sizeof(sysno) + 1, itemsize); + } } - for (i = 0; iinfo[i] = 0; - - nptr = in_buf; /* skip ref count */ - while (nptr < in_buf + in_size) + else { - int this_sysno; - int len; - rec_decode_unsigned (&this_sysno, nptr, &len); - nptr += len; - - for (i = 0; i < REC_NO_INFO; i++) - { - int this_size; - rec_decode_unsigned (&this_size, nptr, &len); - nptr += len; - - if (this_size == 0) - continue; - rec->size[i] = this_size-1; - - if (rec->size[i]) - { - rec->info[i] = nptr; - nptr += rec->size[i]; - } - else - rec->info[i] = NULL; - } - if (this_sysno == sysno) - break; + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ + + r = bf_read(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (r == 1 && sz1 < itemsize) /* boundary? - must read second part */ + r = bf_read(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); + if (r != 1 && !ignoreError) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at pos %ld", + p->index_fname, (long) pos); + } } - for (i = 0; iinfo[i] && rec->size[i]) - { - char *np = xmalloc (rec->size[i]); - memcpy (np, rec->info[i], rec->size[i]); - rec->info[i] = np; - } - else - { - assert (rec->info[i] == 0); - assert (rec->size[i] == 0); - } + struct record_index_entry *ep = buf; + yaz_log(YLOG_LOG, "read r=%d sysno=" ZINT_FORMAT " next=" ZINT_FORMAT + " sz=%d", r, sysno, ep->next, ep->size); } - xfree (bz_buf); - rec_cache_insert (p, rec, recordFlagNop); - return rec; +#endif + return r; } -Record rec_get (Records p, int sysno) -{ - Record rec; - zebra_mutex_lock (&p->mutex); - - rec = rec_get_int (p, sysno); - zebra_mutex_unlock (&p->mutex); - return rec; -} +struct code_read_data { + int no; + zint sysno; + void *buf; + int itemsize; + int insert_flag; +}; -static Record rec_new_int (Records p) +int bt_code_read(void *vp, char **dst, int *insertMode) { - int sysno, i; - Record rec; + struct code_read_data *s = (struct code_read_data *) vp; - assert (p); - rec = (Record) xmalloc (sizeof(*rec)); - if (1 || p->head.index_free == 0) - sysno = (p->head.index_last)++; - else - { - struct record_index_entry entry; + if (s->no == 0) + return 0; - read_indx (p, p->head.index_free, &entry, sizeof(entry), 0); - sysno = p->head.index_free; - p->head.index_free = entry.next; - } - (p->head.no_records)++; - rec->sysno = sysno; - for (i = 0; i < REC_NO_INFO; i++) - { - rec->info[i] = NULL; - rec->size[i] = 0; - } - rec_cache_insert (p, rec, recordFlagNew); - return rec; -} + (s->no)--; -Record rec_new (Records p) -{ - Record rec; - zebra_mutex_lock (&p->mutex); - - rec = rec_new_int (p); - zebra_mutex_unlock (&p->mutex); - return rec; + memcpy(*dst, &s->sysno, sizeof(zint)); + *dst += sizeof(zint); + **dst = s->itemsize; + (*dst)++; + memcpy(*dst, s->buf, s->itemsize); + *dst += s->itemsize; + *insertMode = s->insert_flag; + return 1; } -void rec_del (Records p, Record *recpp) +void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize) { - Record *recp; - - zebra_mutex_lock (&p->mutex); - (p->head.no_records)--; - if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagDelete))) - { - rec_rm (recp); - *recp = *recpp; - } - else +#if 0 + yaz_log(YLOG_LOG, "write_indx sysno=" ZINT_FORMAT, sysno); +#endif + if (p->isamb) { - rec_cache_insert (p, *recpp, recordFlagDelete); - rec_rm (recpp); - } - zebra_mutex_unlock (&p->mutex); - *recpp = NULL; -} + struct code_read_data input; + ISAMC_I isamc_i; -void rec_put (Records p, Record *recpp) -{ - Record *recp; + input.sysno = sysno; + input.buf = buf; + input.itemsize = itemsize; - zebra_mutex_lock (&p->mutex); - if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagWrite))) - { - rec_rm (recp); - *recp = *recpp; + isamc_i.clientData = &input; + isamc_i.read_item = bt_code_read; + + input.no = 1; + input.insert_flag = 2; + isamb_merge(p->isamb, &p->isam_p, &isamc_i); } else { - rec_cache_insert (p, *recpp, recordFlagWrite); - rec_rm (recpp); - } - zebra_mutex_unlock (&p->mutex); - *recpp = NULL; -} - -void rec_rm (Record *recpp) -{ - int i; - - if (!*recpp) - return ; - for (i = 0; i < REC_NO_INFO; i++) - xfree ((*recpp)->info[i]); - xfree (*recpp); - *recpp = NULL; -} + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ -Record rec_cp (Record rec) -{ - Record n; - int i; + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ - n = (Record) xmalloc (sizeof(*n)); - n->sysno = rec->sysno; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - { - n->info[i] = NULL; - n->size[i] = 0; - } - else - { - n->size[i] = rec->size[i]; - n->info[i] = (char *) xmalloc (rec->size[i]); - memcpy (n->info[i], rec->info[i], rec->size[i]); - } - return n; + bf_write(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (sz1 < itemsize) /* boundary? must write second part */ + bf_write(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); + } } -char *rec_strdup (const char *s, size_t *len) -{ - char *p; - - if (!s) - { - *len = 0; - return NULL; - } - *len = strlen(s)+1; - p = (char *) xmalloc (*len); - strcpy (p, s); - return p; -} +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */