X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frecindex.c;h=73ef6339d94b3e9eadfc238d6a9d99472b23998a;hb=0824b8e4170a9bba07a0097d1af18f81c75729bd;hp=90efa6349b8667c1e068f83f82195d3c7bffd65f;hpb=e80772c51b80b8e5c9c3cfb037c988308a4014b9;p=idzebra-moved-to-github.git diff --git a/index/recindex.c b/index/recindex.c index 90efa63..73ef633 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,10 +1,58 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: recindex.c,v $ - * Revision 1.9 1995-11-30 08:34:33 adam + * Revision 1.20 1998-01-12 15:04:08 adam + * The test option (-s) only uses read-lock (and not write lock). + * + * Revision 1.19 1997/09/17 12:19:16 adam + * Zebra version corresponds to YAZ version 1.4. + * Changed Zebra server so that it doesn't depend on global common_resource. + * + * Revision 1.18 1997/07/15 16:28:42 adam + * Bug fix: storeData didn't work with files with multiple records. + * Bug fix: fixed memory management with records; not really well + * thought through. + * + * Revision 1.17 1997/02/12 20:39:46 adam + * Implemented options -f that limits the log to the first + * records. + * Changed some log messages also. + * + * Revision 1.16 1996/06/04 10:19:00 adam + * Minor changes - removed include of ctype.h. + * + * Revision 1.15 1996/05/13 14:23:06 adam + * Work on compaction of set/use bytes in dictionary. + * + * Revision 1.14 1996/02/01 20:48:15 adam + * The total size of records are always checked in rec_cache_insert to + * reduce memory usage. + * + * Revision 1.13 1995/12/11 09:12:49 adam + * The rec_get function returns NULL if record doesn't exist - will + * happen in the server if the result set records have been deleted since + * the creation of the set (i.e. the search). + * The server saves a result temporarily if it is 'volatile', i.e. the + * set is register dependent. + * + * Revision 1.12 1995/12/07 17:38:47 adam + * Work locking mechanisms for concurrent updates/commit. + * + * Revision 1.11 1995/12/06 13:58:26 adam + * Improved flushing of records - all flushes except the last one + * don't write the last accessed. Also flush takes place if record + * info occupy more than about 256k. + * + * Revision 1.10 1995/12/06 12:41:24 adam + * New command 'stat' for the index program. + * Filenames can be read from stdin by specifying '-'. + * Bug fix/enhancement of the transformation from terms to regular + * expressons in the search engine. + * + * Revision 1.9 1995/11/30 08:34:33 adam * Started work on commit facility. * Changed a few malloc/free to xmalloc/xfree. * @@ -42,74 +90,8 @@ #include #include #include -#include -#include -#include -#include - -#include "recindex.h" - -#define USE_BF 1 - -#if USE_BF -#include - -#define REC_BLOCK_TYPES 2 -#define REC_HEAD_MAGIC "recindx" - -struct records_info { - int rw; - - char *index_fname; - BFile index_BFile; - - - char *data_fname[REC_BLOCK_TYPES]; - BFile data_BFile[REC_BLOCK_TYPES]; - - char *tmp_buf; - int tmp_size; - - struct record_cache_entry *record_cache; - int cache_size; - int cache_cur; - int cache_max; - - struct records_head { - char magic[8]; - int block_size[REC_BLOCK_TYPES]; - int block_free[REC_BLOCK_TYPES]; - int block_last[REC_BLOCK_TYPES]; - int block_used[REC_BLOCK_TYPES]; - int block_move[REC_BLOCK_TYPES]; - - int index_last; - int index_free; - int no_records; - - } head; -}; - -enum recordCacheFlag { recordFlagNop, recordFlagWrite, recordFlagNew, - recordFlagDelete }; - -struct record_cache_entry { - Record rec; - enum recordCacheFlag flag; -}; - -struct record_index_entry { - union { - struct { - int next; - int size; - } used; - struct { - int next; - } free; - } u; -}; +#include "recindxp.h" static void rec_write_head (Records p) { @@ -128,11 +110,11 @@ static void rec_write_head (Records p) static void rec_tmp_expand (Records p, int size, int dst_type) { - if (p->tmp_size < size + 256 || + if (p->tmp_size < size + 2048 || p->tmp_size < p->head.block_size[dst_type]*2) { xfree (p->tmp_buf); - p->tmp_size = size + p->head.block_size[dst_type]*2 + 256; + p->tmp_size = size + p->head.block_size[dst_type]*2 + 2048; p->tmp_buf = xmalloc (p->tmp_size); } } @@ -148,7 +130,6 @@ static int read_indx (Records p, int sysno, void *buf, int itemsize, { logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", p->index_fname, (long) pos); - abort (); exit (1); } return r; @@ -169,7 +150,8 @@ static void rec_release_blocks (Records p, int sysno) if (read_indx (p, sysno, &entry, sizeof(entry), 1) != 1) return ; - freeblock = entry.u.used.next; + p->head.total_bytes -= entry.size; + freeblock = entry.next; assert (freeblock > 0); dst_type = freeblock & 7; assert (dst_type < REC_BLOCK_TYPES); @@ -200,7 +182,8 @@ static void rec_delete_single (Records p, Record rec) rec_release_blocks (p, rec->sysno); - entry.u.free.next = p->head.index_free; + entry.next = p->head.index_free; + entry.size = 0; p->head.index_free = rec->sysno; write_indx (p, rec->sysno, &entry, sizeof(entry)); } @@ -250,15 +233,16 @@ static void rec_write_single (Records p, Record rec) { logf (LOG_FATAL|LOG_ERRNO, "read in %s at free block %d", p->data_fname[dst_type], block_free); + exit (1); } } else block_free = p->head.block_last[dst_type]++; if (block_prev == -1) { - entry.u.used.next = block_free*8 + dst_type; - entry.u.used.size = size; - + entry.next = block_free*8 + dst_type; + entry.size = size; + p->head.total_bytes += size; write_indx (p, rec->sysno, &entry, sizeof(entry)); } else @@ -284,7 +268,7 @@ static void rec_update_single (Records p, Record rec) rec_write_single (p, rec); } -Records rec_open (int rw) +Records rec_open (BFiles bfs, int rw) { Records p; int i, r; @@ -294,7 +278,7 @@ Records rec_open (int rw) p->tmp_size = 1024; p->tmp_buf = xmalloc (p->tmp_size); p->index_fname = "recindex"; - p->index_BFile = bf_open (p->index_fname, 128, rw); + p->index_BFile = bf_open (bfs, p->index_fname, 128, rw); if (p->index_BFile == NULL) { logf (LOG_FATAL|LOG_ERRNO, "open %s", p->index_fname); @@ -308,6 +292,7 @@ Records rec_open (int rw) p->head.index_free = 0; p->head.index_last = 1; p->head.no_records = 0; + p->head.total_bytes = 0; for (i = 0; ihead.block_free[i] = 0; @@ -343,7 +328,7 @@ Records rec_open (int rw) } for (i = 0; idata_BFile[i] = bf_open (p->data_fname[i], + if (!(p->data_BFile[i] = bf_open (bfs, p->data_fname[i], p->head.block_size[i], rw))) { @@ -357,10 +342,13 @@ Records rec_open (int rw) return p; } -static void rec_cache_flush (Records p) +static void rec_cache_flush (Records p, int saveCount) { - int i; - for (i = 0; icache_cur; i++) + int i, j; + + if (saveCount >= p->cache_cur) + saveCount = 0; + for (i = 0; icache_cur - saveCount; i++) { struct record_cache_entry *e = p->record_cache + i; switch (e->flag) @@ -379,7 +367,10 @@ static void rec_cache_flush (Records p) } rec_rm (&e->rec); } - p->cache_cur = 0; + for (j = 0; jrecord_cache+j, p->record_cache+i, + sizeof(*p->record_cache)); + p->cache_cur = saveCount; } static Record *rec_cache_lookup (Records p, int sysno, @@ -404,7 +395,20 @@ static void rec_cache_insert (Records p, Record rec, enum recordCacheFlag flag) struct record_cache_entry *e; if (p->cache_cur == p->cache_max) - rec_cache_flush (p); + rec_cache_flush (p, 1); + else if (p->cache_cur > 0) + { + int i, j; + int used = 0; + for (i = 0; icache_cur; i++) + { + Record r = (p->record_cache + i)->rec; + for (j = 0; jsize[j]; + } + if (used > 256000) + rec_cache_flush (p, 1); + } assert (p->cache_cur < p->cache_max); e = p->record_cache + (p->cache_cur)++; @@ -419,7 +423,7 @@ void rec_close (Records *pp) assert (p); - rec_cache_flush (p); + rec_cache_flush (p, 0); xfree (p->record_cache); if (p->rw) @@ -454,16 +458,20 @@ Record rec_get (Records p, int sysno) if ((recp = rec_cache_lookup (p, sysno, recordFlagNop))) return rec_cp (*recp); - read_indx (p, sysno, &entry, sizeof(entry), 0); + if (!read_indx (p, sysno, &entry, sizeof(entry), 1)) + return NULL; /* record is not there! */ - dst_type = entry.u.used.next & 7; + if (!entry.size) + return NULL; /* record is deleted */ + + dst_type = entry.next & 7; assert (dst_type < REC_BLOCK_TYPES); - freeblock = entry.u.used.next / 8; + freeblock = entry.next / 8; assert (freeblock > 0); rec = xmalloc (sizeof(*rec)); - rec_tmp_expand (p, entry.u.used.size, dst_type); + rec_tmp_expand (p, entry.size, dst_type); cptr = p->tmp_buf; bf_read (p->data_BFile[dst_type], freeblock, 0, 0, cptr); @@ -507,7 +515,7 @@ Record rec_new (Records p) assert (p); rec = xmalloc (sizeof(*rec)); - if (p->head.index_free == 0) + if (1 || p->head.index_free == 0) sysno = (p->head.index_last)++; else { @@ -515,7 +523,7 @@ Record rec_new (Records p) read_indx (p, p->head.index_free, &entry, sizeof(entry), 0); sysno = p->head.index_free; - p->head.index_free = entry.u.free.next; + p->head.index_free = entry.next; } (p->head.no_records)++; rec->sysno = sysno; @@ -532,6 +540,7 @@ void rec_del (Records p, Record *recpp) { Record *recp; + (p->head.no_records)--; if ((recp = rec_cache_lookup (p, (*recpp)->sysno, recordFlagDelete))) { rec_rm (recp); @@ -565,428 +574,9 @@ void rec_put (Records p, Record *recpp) void rec_rm (Record *recpp) { int i; - for (i = 0; i < REC_NO_INFO; i++) - xfree ((*recpp)->info[i]); - xfree (*recpp); - *recpp = NULL; -} - -Record rec_cp (Record rec) -{ - Record n; - int i; - - n = xmalloc (sizeof(*n)); - n->sysno = rec->sysno; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - { - n->info[i] = NULL; - n->size[i] = 0; - } - else - { - n->size[i] = rec->size[i]; - n->info[i] = xmalloc (rec->size[i]); - memcpy (n->info[i], rec->info[i], rec->size[i]); - } - return n; -} - -/* no BF --------------------------------------------------- */ -#else - -struct records_info { - int rw; - int index_fd; - char *index_fname; - int data_fd; - char *data_fname; - struct records_head { - char magic[8]; - int no_records; - int index_free; - int index_last; - int data_size; - int data_slack; - int data_used; - } head; - char *tmp_buf; - int tmp_size; - int cache_size; - int cache_cur; - int cache_max; - struct record_cache_entry *record_cache; -}; - -struct record_cache_entry { - Record rec; - int dirty; -}; -struct record_index_entry { - union { - struct { - int offset; - int size; - } used; - struct { - int next; - } free; - } u; -}; - -#define REC_HEAD_MAGIC "rechead" - -static void rec_write_head (Records p) -{ - int r; - - assert (p); - assert (p->index_fd != -1); - if (lseek (p->index_fd, (off_t) 0, SEEK_SET) == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek to 0 in %s", p->index_fname); - exit (1); - } - r = write (p->index_fd, &p->head, sizeof(p->head)); - switch (r) - { - case -1: - logf (LOG_FATAL|LOG_ERRNO, "write head of %s", p->index_fname); - exit (1); - case sizeof(p->head): - break; - default: - logf (LOG_FATAL, "write head of %s. wrote %d", p->index_fname, r); - exit (1); - } -} - -Records rec_open (int rw) -{ - Records p; - int r; - - p = xmalloc (sizeof(*p)); - p->rw = rw; - p->tmp_buf = NULL; - p->tmp_size = 0; - p->data_fname = "recdata"; - p->data_fd = -1; - p->index_fname = "recindex"; - p->index_fd = open (p->index_fname, - rw ? (O_RDWR|O_CREAT) : O_RDONLY, 0666); - if (p->index_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->index_fname); - exit (1); - } - r = read (p->index_fd, &p->head, sizeof(p->head)); - switch (r) - { - case -1: - logf (LOG_FATAL|LOG_ERRNO, "read %s", p->index_fname); - exit (1); - case 0: - memcpy (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); - p->head.index_free = 0; - p->head.index_last = 1; - p->head.no_records = 0; - p->head.data_size = 0; - p->head.data_slack = 0; - p->head.data_used = 0; - if (rw) - rec_write_head (p); - break; - case sizeof(p->head): - if (memcmp (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) - { - logf (LOG_FATAL, "read %s. bad header", p->index_fname); - exit (1); - } - break; - default: - logf (LOG_FATAL, "read head of %s. expected %d. got %d", - p->index_fname, sizeof(p->head), r); - exit (1); - } - p->data_fd = open (p->data_fname, - rw ? (O_RDWR|O_CREAT) : O_RDONLY, 0666); - if (p->data_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->data_fname); - exit (1); - } - p->cache_max = 10; - p->cache_cur = 0; - p->record_cache = xmalloc (sizeof(*p->record_cache)*p->cache_max)); - return p; -} - -static void read_indx (Records p, int sysno, void *buf, int itemsize) -{ - int r; - off_t pos = (sysno-1)*itemsize + sizeof(p->head); - - if (lseek (p->index_fd, pos, SEEK_SET) == (pos) -1) - { - logf (LOG_FATAL|LOG_ERRNO, "seek in %s to pos %ld", - p->index_fname, (long) pos); - exit (1); - } - r = read (p->index_fd, buf, itemsize); - if (r != itemsize) - { - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", - p->index_fname, (long) pos); - else - logf (LOG_FATAL, "read in %s at pos %ld", - p->index_fname, (long) pos); - exit (1); - } -} - -static void rec_write_single (Records p, Record rec) -{ - struct record_index_entry entry; - int r, i, size = 0, got; - char *cptr; - off_t pos = (rec->sysno-1)*sizeof(entry) + sizeof(p->head); - - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - size += sizeof(*rec->size); - else - size += sizeof(*rec->size) + rec->size[i]; - - entry.u.used.offset = p->head.data_size; - entry.u.used.size = size; - p->head.data_size += size; - p->head.data_used += size; - - if (lseek (p->index_fd, pos, SEEK_SET) == (pos) -1) - { - logf (LOG_FATAL|LOG_ERRNO, "seek in %s to pos %ld", - p->index_fname, (long) pos); - exit (1); - } - r = write (p->index_fd, &entry, sizeof(entry)); - if (r != sizeof(entry)) - { - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "write of %s at pos %ld", - p->index_fname, (long) pos); - else - logf (LOG_FATAL, "write of %s at pos %ld", - p->index_fname, (long) pos); - exit (1); - } - if (lseek (p->data_fd, entry.u.used.offset, SEEK_SET) == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek in %s to pos %ld", - p->data_fname, entry.u.used.offset); - exit (1); - } - if (p->tmp_size < entry.u.used.size) - { - xfree (p->tmp_buf); - p->tmp_size = entry.u.used.size + 16384; - p->tmp_buf = xmalloc (p->tmp_size)); - } - cptr = p->tmp_buf; - for (i = 0; i < REC_NO_INFO; i++) - { - memcpy (cptr, &rec->size[i], sizeof(*rec->size)); - cptr += sizeof(*rec->size); - if (rec->info[i]) - { - memcpy (cptr, rec->info[i], rec->size[i]); - cptr += rec->size[i]; - } - } - for (got = 0; got < entry.u.used.size; got += r) - { - r = write (p->data_fd, p->tmp_buf + got, entry.u.used.size - got); - if (r <= 0) - { - logf (LOG_FATAL|LOG_ERRNO, "write of %s", p->data_fname); - exit (1); - } - } -} - -static void rec_cache_flush (Records p) -{ - int i; - for (i = 0; icache_cur; i++) - { - struct record_cache_entry *e = p->record_cache + i; - if (e->dirty) - rec_write_single (p, e->rec); - rec_rm (&e->rec); - } - p->cache_cur = 0; -} - -static Record *rec_cache_lookup (Records p, int sysno, int dirty) -{ - int i; - for (i = 0; icache_cur; i++) - { - struct record_cache_entry *e = p->record_cache + i; - if (e->rec->sysno == sysno) - { - if (dirty) - e->dirty = 1; - return &e->rec; - } - } - return NULL; -} - -static void rec_cache_insert (Records p, Record rec, int dirty) -{ - struct record_cache_entry *e; - - if (p->cache_cur == p->cache_max) - rec_cache_flush (p); - assert (p->cache_cur < p->cache_max); - - e = p->record_cache + (p->cache_cur)++; - e->dirty = dirty; - e->rec = rec_cp (rec); -} - -void rec_close (Records *p) -{ - assert (*p); - - rec_cache_flush (*p); - xfree ((*p)->record_cache); - - if ((*p)->rw) - rec_write_head (*p); - - if ((*p)->index_fd != -1) - close ((*p)->index_fd); - - if ((*p)->data_fd != -1) - close ((*p)->data_fd); - - xfree ((*p)->tmp_buf); - - xfree (*p); - *p = NULL; -} - -Record rec_get (Records p, int sysno) -{ - int i; - Record rec, *recp; - struct record_index_entry entry; - int r, got; - char *nptr; - - assert (sysno > 0); - assert (p); - - if ((recp = rec_cache_lookup (p, sysno, 0))) - return rec_cp (*recp); - - read_indx (p, sysno, &entry, sizeof(entry)); - - rec = xmalloc (sizeof(*rec)); - if (lseek (p->data_fd, entry.u.used.offset, SEEK_SET) == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek in %s to pos %ld", - p->data_fname, entry.u.used.offset); - exit (1); - } - if (p->tmp_size < entry.u.used.size) - { - xfree (p->tmp_buf); - p->tmp_size = entry.u.used.size + 16384; - p->tmp_buf = xmalloc (p->tmp_size)); - } - for (got = 0; got < entry.u.used.size; got += r) - { - r = read (p->data_fd, p->tmp_buf + got, entry.u.used.size - got); - if (r <= 0) - { - logf (LOG_FATAL|LOG_ERRNO, "read of %s", p->data_fname); - exit (1); - } - } - rec->sysno = sysno; - - nptr = p->tmp_buf; - for (i = 0; i < REC_NO_INFO; i++) - { - memcpy (&rec->size[i], nptr, sizeof(*rec->size)); - nptr += sizeof(*rec->size); - if (rec->size[i]) - { - rec->info[i] = xmalloc (rec->size[i]); - memcpy (rec->info[i], nptr, rec->size[i]); - nptr += rec->size[i]; - } - else - rec->info[i] = NULL; - } - rec_cache_insert (p, rec, 0); - return rec; -} - -Record rec_new (Records p) -{ - int sysno, i; - Record rec; - - assert (p); - rec = xmalloc (sizeof(*rec)); - if (p->head.index_free == 0) - sysno = (p->head.index_last)++; - else - { - struct record_index_entry entry; - - read_indx (p, p->head.index_free, &entry, sizeof(entry)); - sysno = p->head.index_free; - p->head.index_free = entry.u.free.next; - } - (p->head.no_records)++; - rec->sysno = sysno; - for (i = 0; i < REC_NO_INFO; i++) - { - rec->info[i] = NULL; - rec->size[i] = 0; - } - rec_cache_insert (p, rec, 1); - return rec; -} - -void rec_put (Records p, Record *recpp) -{ - Record *recp; - - if ((recp = rec_cache_lookup (p, (*recpp)->sysno, 1))) - { - rec_rm (recp); - *recp = *recpp; - } - else - { - rec_cache_insert (p, *recpp, 1); - rec_rm (recpp); - } - *recpp = NULL; -} - -void rec_rm (Record *recpp) -{ - int i; + if (!*recpp) + return ; for (i = 0; i < REC_NO_INFO; i++) xfree ((*recpp)->info[i]); xfree (*recpp); @@ -1015,13 +605,6 @@ Record rec_cp (Record rec) return n; } -void rec_del (Records p, Record *recpp) -{ - assert (0); -} - - -#endif char *rec_strdup (const char *s, size_t *len) {