X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frecindex.c;h=e41c56e8396fc2bb0f01e1526c00bf43ab1bc3c2;hp=bdd134cbe02f0eb9ac7334db1bff0cd3604de577;hb=27bdd6aa26843aeac89f635ed495996088d8e8aa;hpb=ea01436a957572aaaa878b59469a4cedce7e5e21 diff --git a/index/recindex.c b/index/recindex.c index bdd134c..e41c56e 100644 --- a/index/recindex.c +++ b/index/recindex.c @@ -1,499 +1,358 @@ -/* - * Copyright (C) 1994-1995, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: recindex.c,v $ - * Revision 1.4 1995-11-20 16:59:46 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.3 1995/11/16 15:34:55 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.2 1995/11/15 19:13:08 adam - * Work on record management. - * - * Revision 1.1 1995/11/15 14:46:20 adam - * Started work on better record management system. - * - */ +/* This file is part of the Zebra server. + Copyright (C) 2004-2013 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#if HAVE_CONFIG_H +#include +#endif #include +#include #include #include -#include -#include -#include -#include +#include +#include #include "recindex.h" -struct records_info { - int rw; - int index_fd; - char *index_fname; - int data_fd; - char *data_fname; - struct records_head { - char magic[8]; - int no_records; - int index_free; - int index_last; - int data_size; - int data_slack; - int data_used; - } head; - char *tmp_buf; - int tmp_size; - int cache_size; - int cache_cur; - int cache_max; - struct record_cache_entry *record_cache; -}; +#define RIDX_CHUNK 128 + -struct record_cache_entry { - Record rec; - int dirty; +struct recindex { + char *index_fname; + BFile index_BFile; + ISAMB isamb; + ISAM_P isam_p; }; struct record_index_entry { - union { - struct { - int offset; - int size; - } used; - struct { - int next; - } free; - } u; -}; + zint next; /* first block of record info / next free entry */ + int size; /* size of record or 0 if free entry */ +} ent; -#define REC_HEAD_MAGIC "rechead" -char *rec_strdup (const char *s, size_t *len) +static void rect_log_item(int level, const void *b, const char *txt) { - char *p; + zint sys; + int len; - if (!s) - { - *len = 0; - return NULL; - } - *len = strlen(s)+1; - p = malloc (*len); - if (!p) + + memcpy(&sys, b, sizeof(sys)); + len = ((const char *) b)[sizeof(sys)]; + + if (len == sizeof(struct record_index_entry)) { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); + memcpy(&ent, (const char *)b + sizeof(sys) + 1, len); + yaz_log(YLOG_LOG, "%s " ZINT_FORMAT " next=" ZINT_FORMAT " sz=%d", txt, sys, + ent.next, ent.size); + } - strcpy (p, s); - return p; + else + yaz_log(YLOG_LOG, "%s " ZINT_FORMAT, txt, sys); } -static void rec_write_head (Records p) +int rect_compare(const void *a, const void *b) { - int r; + zint s_a, s_b; - assert (p); - assert (p->index_fd != -1); - if (lseek (p->index_fd, (off_t) 0, SEEK_SET) == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek to 0 in %s", p->index_fname); - exit (1); - } - r = write (p->index_fd, &p->head, sizeof(p->head)); - switch (r) - { - case -1: - logf (LOG_FATAL|LOG_ERRNO, "write head of %s", p->index_fname); - exit (1); - case sizeof(p->head): - break; - default: - logf (LOG_FATAL, "write head of %s. wrote %d", p->index_fname, r); - exit (1); - } + memcpy(&s_a, a, sizeof(s_a)); + memcpy(&s_b, b, sizeof(s_b)); + + if (s_a > s_b) + return 1; + else if (s_a < s_b) + return -1; + return 0; } -Records rec_open (int rw) +void *rect_code_start(void) { - Records p; - int r; + return 0; +} - if (!(p = malloc (sizeof(*p)))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - p->rw = rw; - p->tmp_buf = NULL; - p->tmp_size = 0; - p->data_fname = "recdata"; - p->data_fd = -1; - p->index_fname = "recindex"; - p->index_fd = open (p->index_fname, - rw ? (O_RDWR|O_CREAT) : O_RDONLY, 0666); - if (p->index_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->index_fname); - exit (1); - } - r = read (p->index_fd, &p->head, sizeof(p->head)); - switch (r) - { - case -1: - logf (LOG_FATAL|LOG_ERRNO, "read %s", p->index_fname); - exit (1); - case 0: - memcpy (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)); - p->head.index_free = 0; - p->head.index_last = 1; - p->head.no_records = 0; - p->head.data_size = 0; - p->head.data_slack = 0; - p->head.data_used = 0; - if (rw) - rec_write_head (p); - break; - case sizeof(p->head): - if (memcmp (p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic))) - { - logf (LOG_FATAL, "read %s. bad header", p->index_fname); - exit (1); - } - break; - default: - logf (LOG_FATAL, "read head of %s. expected %d. got %d", - p->index_fname, sizeof(p->head), r); - exit (1); - } - p->data_fd = open (p->data_fname, - rw ? (O_RDWR|O_CREAT) : O_RDONLY, 0666); - if (p->data_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "open %s", p->data_fname); - exit (1); - } - p->cache_max = 10; - p->cache_cur = 0; - if (!(p->record_cache = malloc (sizeof(*p->record_cache)*p->cache_max))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - return p; +void rect_encode(void *p, char **dst, const char **src) +{ + zint sys; + int len; + + memcpy(&sys, *src, sizeof(sys)); + zebra_zint_encode(dst, sys); + (*src) += sizeof(sys); + + len = **src; + **dst = len; + (*src)++; + (*dst)++; + + memcpy(*dst, *src, len); + *dst += len; + *src += len; } -static void read_indx (Records p, int sysno, void *buf, int itemsize) +void rect_decode(void *p, char **dst, const char **src) { - int r; - off_t pos = (sysno-1)*itemsize + sizeof(p->head); + zint sys; + int len; - if (lseek (p->index_fd, pos, SEEK_SET) == (pos) -1) - { - logf (LOG_FATAL|LOG_ERRNO, "seek in %s to pos %ld", - p->index_fname, (long) pos); - exit (1); - } - r = read (p->index_fd, buf, itemsize); - if (r != itemsize) - { - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "read in %s at pos %ld", - p->index_fname, (long) pos); - else - logf (LOG_FATAL, "read in %s at pos %ld", - p->index_fname, (long) pos); - exit (1); - } + zebra_zint_decode(src, &sys); + memcpy(*dst, &sys, sizeof(sys)); + *dst += sizeof(sys); + + len = **src; + **dst = len; + (*src)++; + (*dst)++; + + memcpy(*dst, *src, len); + *dst += len; + *src += len; } -static void rec_write_single (Records p, Record rec) +void rect_code_reset(void *p) { - struct record_index_entry entry; - int r, i, size = 0, got; - char *cptr; - off_t pos = (rec->sysno-1)*sizeof(entry) + sizeof(p->head); - - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - size += sizeof(*rec->size); - else - size += sizeof(*rec->size) + rec->size[i]; - - entry.u.used.offset = p->head.data_size; - entry.u.used.size = size; - p->head.data_size += size; - p->head.data_used += size; - - if (lseek (p->index_fd, pos, SEEK_SET) == (pos) -1) - { - logf (LOG_FATAL|LOG_ERRNO, "seek in %s to pos %ld", - p->index_fname, (long) pos); - exit (1); - } - r = write (p->index_fd, &entry, sizeof(entry)); - if (r != sizeof(entry)) - { - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "write of %s at pos %ld", - p->index_fname, (long) pos); - else - logf (LOG_FATAL, "write of %s at pos %ld", - p->index_fname, (long) pos); - exit (1); - } - if (lseek (p->data_fd, entry.u.used.offset, SEEK_SET) == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek in %s to pos %ld", - p->data_fname, entry.u.used.offset); - exit (1); - } - if (p->tmp_size < entry.u.used.size) - { - free (p->tmp_buf); - p->tmp_size = entry.u.used.size + 16384; - if (!(p->tmp_buf = malloc (p->tmp_size))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - } - cptr = p->tmp_buf; - for (i = 0; i < REC_NO_INFO; i++) +} + +void rect_code_stop(void *p) +{ +} + + +recindex_t recindex_open(BFiles bfs, int rw, int use_isamb) +{ + recindex_t p = xmalloc(sizeof(*p)); + p->index_BFile = 0; + p->isamb = 0; + + p->index_fname = "reci"; + p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw); + if (p->index_BFile == NULL) { - memcpy (cptr, &rec->size[i], sizeof(*rec->size)); - cptr += sizeof(*rec->size); - if (rec->info[i]) - { - memcpy (cptr, rec->info[i], rec->size[i]); - cptr += rec->size[i]; - } + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname); + xfree(p); + return 0; } - for (got = 0; got < entry.u.used.size; got += r) + + if (use_isamb) { - r = write (p->data_fd, p->tmp_buf + got, entry.u.used.size - got); - if (r <= 0) - { - logf (LOG_FATAL|LOG_ERRNO, "write of %s", p->data_fname); - exit (1); - } + int isam_block_size = 4096; + ISAMC_M method; + + method.compare_item = rect_compare; + method.log_item = rect_log_item; + method.codec.start = rect_code_start; + method.codec.encode = rect_encode; + method.codec.decode = rect_decode; + method.codec.reset = rect_code_reset; + method.codec.stop = rect_code_stop; + + p->index_fname = "rect"; + p->isamb = isamb_open2(bfs, p->index_fname, rw, &method, + /* cache */ 0, + /* no_cat */ 1, &isam_block_size, + /* use_root_ptr */ 1); + + p->isam_p = 0; + if (p->isamb) + p->isam_p = isamb_get_root_ptr(p->isamb); + } + return p; } -static void rec_cache_flush (Records p) +static void log_pr(const char *txt) { - int i; - for (i = 0; icache_cur; i++) - { - struct record_cache_entry *e = p->record_cache + i; - if (e->dirty) - rec_write_single (p, e->rec); - rec_rm (&e->rec); - } - p->cache_cur = 0; + yaz_log(YLOG_LOG, "%s", txt); } -static Record *rec_cache_lookup (Records p, int sysno, int dirty) + +void recindex_close(recindex_t p) { - int i; - for (i = 0; icache_cur; i++) + if (p) { - struct record_cache_entry *e = p->record_cache + i; - if (e->rec->sysno == sysno) + if (p->index_BFile) + bf_close(p->index_BFile); + if (p->isamb) { - if (dirty) - e->dirty = 1; - return &e->rec; + isamb_set_root_ptr(p->isamb, p->isam_p); + isamb_dump(p->isamb, p->isam_p, log_pr); + isamb_close(p->isamb); } + xfree(p); } - return NULL; } -static void rec_cache_insert (Records p, Record rec, int dirty) +int recindex_read_head(recindex_t p, void *buf) { - struct record_cache_entry *e; - - if (p->cache_cur == p->cache_max) - rec_cache_flush (p); - assert (p->cache_cur < p->cache_max); - - e = p->record_cache + (p->cache_cur)++; - e->dirty = dirty; - e->rec = rec_cp (rec); + return bf_read(p->index_BFile, 0, 0, 0, buf); } -void rec_close (Records *p) +const char *recindex_get_fname(recindex_t p) { - assert (*p); - - rec_cache_flush (*p); - free ((*p)->record_cache); - - if ((*p)->rw) - rec_write_head (*p); - - if ((*p)->index_fd != -1) - close ((*p)->index_fd); - - if ((*p)->data_fd != -1) - close ((*p)->data_fd); - - free ((*p)->tmp_buf); - - free (*p); - *p = NULL; + return p->index_fname; } -Record rec_get (Records p, int sysno) +ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len) { - int i; - Record rec, *recp; - struct record_index_entry entry; - int r, got; - char *nptr; + int r; - assert (sysno > 0); - assert (p); + assert(p); - if ((recp = rec_cache_lookup (p, sysno, 0))) - return rec_cp (*recp); + assert(p->index_BFile); - read_indx (p, sysno, &entry, sizeof(entry)); - - if (!(rec = malloc (sizeof(*rec)))) + r = bf_write(p->index_BFile, 0, 0, len, buf); + if (r) { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "write head of %s", p->index_fname); + return ZEBRA_FAIL; } - if (lseek (p->data_fd, entry.u.used.offset, SEEK_SET) == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek in %s to pos %ld", - p->data_fname, entry.u.used.offset); - exit (1); - } - if (p->tmp_size < entry.u.used.size) + return ZEBRA_OK; +} + +int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, + int ignoreError) +{ + int r = 0; + if (p->isamb) { - free (p->tmp_buf); - p->tmp_size = entry.u.used.size + 16384; - if (!(p->tmp_buf = malloc (p->tmp_size))) + if (p->isam_p) { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); + char item[256]; + char *st = item; + char untilbuf[sizeof(zint) + 1]; + + ISAMB_PP isam_pp = isamb_pp_open(p->isamb, p->isam_p, 1); + + memcpy(untilbuf, &sysno, sizeof(sysno)); + untilbuf[sizeof(sysno)] = 0; + r = isamb_pp_forward(isam_pp, st, untilbuf); + + isamb_pp_close(isam_pp); + if (!r) + return 0; + + if (item[sizeof(sysno)] != itemsize) + { + yaz_log(YLOG_WARN, "unexpected entry size %d != %d", + item[sizeof(sysno)], itemsize); + return 0; + } + memcpy(buf, item + sizeof(sysno) + 1, itemsize); } } - for (got = 0; got < entry.u.used.size; got += r) + else { - r = read (p->data_fd, p->tmp_buf + got, entry.u.used.size - got); - if (r <= 0) + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ + + r = bf_read(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (r == 1 && sz1 < itemsize) /* boundary? - must read second part */ + r = bf_read(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); + if (r != 1 && !ignoreError) { - logf (LOG_FATAL|LOG_ERRNO, "read of %s", p->data_fname); - exit (1); + yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at pos %ld", + p->index_fname, (long) pos); } } - rec->sysno = sysno; - - nptr = p->tmp_buf; - for (i = 0; i < REC_NO_INFO; i++) +#if 0 { - memcpy (&rec->size[i], nptr, sizeof(*rec->size)); - nptr += sizeof(*rec->size); - if (rec->size[i]) - { - rec->info[i] = malloc (rec->size[i]); - memcpy (rec->info[i], nptr, rec->size[i]); - nptr += rec->size[i]; - } - else - rec->info[i] = NULL; + struct record_index_entry *ep = buf; + yaz_log(YLOG_LOG, "read r=%d sysno=" ZINT_FORMAT " next=" ZINT_FORMAT + " sz=%d", r, sysno, ep->next, ep->size); } - rec_cache_insert (p, rec, 0); - return rec; +#endif + return r; } -Record rec_new (Records p) +struct code_read_data { + int no; + zint sysno; + void *buf; + int itemsize; + int insert_flag; +}; + +int bt_code_read(void *vp, char **dst, int *insertMode) { - int sysno, i; - Record rec; + struct code_read_data *s = (struct code_read_data *) vp; - assert (p); - if (!(rec = malloc (sizeof(*rec)))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - if (p->head.index_free == 0) - sysno = (p->head.index_last)++; - else - { - struct record_index_entry entry; + if (s->no == 0) + return 0; - read_indx (p, p->head.index_free, &entry, sizeof(entry)); - sysno = p->head.index_free; - p->head.index_free = entry.u.free.next; - } - (p->head.no_records)++; - rec->sysno = sysno; - for (i = 0; i < REC_NO_INFO; i++) - { - rec->info[i] = NULL; - rec->size[i] = 0; - } - rec_cache_insert (p, rec, 1); - return rec; + (s->no)--; + + memcpy(*dst, &s->sysno, sizeof(zint)); + *dst += sizeof(zint); + **dst = s->itemsize; + (*dst)++; + memcpy(*dst, s->buf, s->itemsize); + *dst += s->itemsize; + *insertMode = s->insert_flag; + return 1; } -void rec_put (Records p, Record *recpp) +void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize) { - Record *recp; - - if ((recp = rec_cache_lookup (p, (*recpp)->sysno, 1))) +#if 0 + yaz_log(YLOG_LOG, "write_indx sysno=" ZINT_FORMAT, sysno); +#endif + if (p->isamb) { - rec_rm (recp); - *recp = *recpp; + struct code_read_data input; + ISAMC_I isamc_i; + + input.sysno = sysno; + input.buf = buf; + input.itemsize = itemsize; + + isamc_i.clientData = &input; + isamc_i.read_item = bt_code_read; + + input.no = 1; + input.insert_flag = 2; + isamb_merge(p->isamb, &p->isam_p, &isamc_i); } else { - rec_cache_insert (p, *recpp, 1); - rec_rm (recpp); + zint pos = (sysno-1)*itemsize; + int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK); + int sz1 = RIDX_CHUNK - off; /* sz1 is size of buffer to read.. */ + + if (sz1 > itemsize) + sz1 = itemsize; /* no more than itemsize bytes */ + + bf_write(p->index_BFile, 1+pos/RIDX_CHUNK, off, sz1, buf); + if (sz1 < itemsize) /* boundary? must write second part */ + bf_write(p->index_BFile, 2+pos/RIDX_CHUNK, 0, itemsize - sz1, + (char*) buf + sz1); } - *recpp = NULL; } -void rec_rm (Record *recpp) -{ - int i; - for (i = 0; i < REC_NO_INFO; i++) - free ((*recpp)->info[i]); - free (*recpp); - *recpp = NULL; -} -Record rec_cp (Record rec) -{ - Record n; - int i; +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ - if (!(n = malloc (sizeof(*n)))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - n->sysno = rec->sysno; - for (i = 0; i < REC_NO_INFO; i++) - if (!rec->info[i]) - { - n->info[i] = NULL; - n->size[i] = 0; - } - else - { - n->size[i] = rec->size[i]; - if (!(n->info[i] = malloc (rec->size[i]))) - { - logf (LOG_FATAL|LOG_ERRNO, "malloc. rec_cp"); - exit (1); - } - memcpy (n->info[i], rec->info[i], rec->size[i]); - } - return n; -}