X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isam%2Fisam.c;h=0d61f240c1ddb737a1ef7ba9de921f9c699a5f33;hb=bad06dda453543b8dd716ee54e9b7cb2b015b6e5;hp=2f58aa175bbcdbe511485f4eedc88efd9dca79d8;hpb=5bcaf79fcb3264b26d896eb2bdda907cf9fb9f5f;p=idzebra-moved-to-github.git diff --git a/isam/isam.c b/isam/isam.c index 2f58aa1..0d61f24 100644 --- a/isam/isam.c +++ b/isam/isam.c @@ -1,34 +1,78 @@ -/* - * Copyright (C) 1994, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: isam.c,v $ - * Revision 1.4 1994-09-26 17:11:29 quinn - * Trivial - * - * Revision 1.3 1994/09/26 17:06:35 quinn - * Back again... - * - * Revision 1.1 1994/09/12 08:02:13 quinn - * Not functional yet - * - */ +/* $Id: isam.c,v 1.27 2002-08-02 19:26:56 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + +#include #include #include #include -#include +#include #include #include -#include #include "isutil.h" #include "rootblk.h" -#include "memory.h" -#include "physical.h" #include "keyops.h" +static ispt_struct *ispt_freelist = 0; + +static struct +{ + int total_merge_operations; + int total_items; + int dub_items_removed; + int new_items; + int failed_deletes; + int skipped_inserts; + int delete_insert_noop; + int delete_replace; + int deletes; + int remaps; + int block_jumps; + int tab_deletes; + int new_tables; +} statistics; + +static ISPT ispt_alloc() +{ + ISPT p; + + if (ispt_freelist) + { + p = ispt_freelist; + ispt_freelist = ispt_freelist->next; + } + else + p = (ISPT) xmalloc(sizeof(ispt_struct)); + return p; +} + +static void ispt_free(ISPT pt) +{ + pt->next = ispt_freelist; + ispt_freelist = pt; +} + static int splitargs(const char *s, char *bf[], int max) { int ct = 0; @@ -42,7 +86,7 @@ static int splitargs(const char *s, char *bf[], int max) ct++; if (ct > max) { - log(LOG_WARN, "Ignoring extra args to is resource"); + logf (LOG_WARN, "Ignoring extra args to is resource"); bf[ct] = '\0'; return(ct - 1); } @@ -55,32 +99,53 @@ static int splitargs(const char *s, char *bf[], int max) * Open isam file. * Process resources. */ -ISAM is_open(const char *name, int writeflag) +ISAM is_open(BFiles bfs, const char *name, + int (*cmp)(const void *p1, const void *p2), + int writeflag, int keysize, Res res) { - ISAM new; + ISAM inew; char *nm, *r, *pp[IS_MAX_BLOCKTYPES+1], m[2]; int num, size, rs, tmp, i; is_type_header th; - log(LOG_DEBUG, "is_open(%s, %s)", name, writeflag ? "RW" : "RDONLY"); - new = xmalloc(sizeof(*new)); - new->writeflag = writeflag; + logf (LOG_DEBUG, "is_open(%s, %s)", name, writeflag ? "RW" : "RDONLY"); + if (writeflag) + { + statistics.total_merge_operations = 0; + statistics.total_items = 0; + statistics.dub_items_removed = 0; + statistics.new_items = 0; + statistics.failed_deletes = 0; + statistics.skipped_inserts = 0; + statistics.delete_insert_noop = 0; + statistics.delete_replace = 0; + statistics.deletes = 0; + statistics.remaps = 0; + statistics.new_tables = 0; + statistics.block_jumps = 0; + statistics.tab_deletes = 0; + } + + inew = (ISAM) xmalloc(sizeof(*inew)); + inew->writeflag = writeflag; for (i = 0; i < IS_MAX_BLOCKTYPES; i++) - new->types[i].index = 0; /* dummy */ + inew->types[i].index = 0; /* dummy */ /* determine number and size of blocktypes */ - if (!(r = res_get(common_resource, nm = strconcat(name, ".", - "blocktypes", 0))) || !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES))) + if (!(r = res_get_def(res, + nm = strconcat(name, ".", + "blocktypes", 0), "64 512 4K 32K")) || + !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES))) { - log(LOG_FATAL, "Failed to locate resource %s", nm); + logf (LOG_FATAL, "Failed to locate resource %s", nm); return 0; } - new->num_types = num; + inew->num_types = num; for (i = 0; i < num; i++) { if ((rs = sscanf(pp[i], "%d%1[bBkKmM]", &size, m)) < 1) { - log(LOG_FATAL, "Error in resource %s: %s", r, pp[i]); + logf (LOG_FATAL, "Error in resource %s: %s", r, pp[i]); return 0; } if (rs == 1) @@ -88,136 +153,145 @@ ISAM is_open(const char *name, int writeflag) switch (*m) { case 'b': case 'B': - new->types[i].blocksize = size; break; + inew->types[i].blocksize = size; break; case 'k': case 'K': - new->types[i].blocksize = size * 1024; break; + inew->types[i].blocksize = size * 1024; break; case 'm': case 'M': - new->types[i].blocksize = size * 1048576; break; + inew->types[i].blocksize = size * 1048576; break; default: - log(LOG_FATAL, "Illegal size suffix: %c", *m); + logf (LOG_FATAL, "Illegal size suffix: %c", *m); return 0; } - new->types[i].dbuf = xmalloc(new->types[i].blocksize); + inew->types[i].dbuf = (char *) xmalloc(inew->types[i].blocksize); m[0] = 'A' + i; m[1] = '\0'; - if (!(new->types[i].bf = bf_open(strconcat(name, m, 0), - new->types[i].blocksize, writeflag))) + if (!(inew->types[i].bf = bf_open(bfs, strconcat(name, m, 0), + inew->types[i].blocksize, writeflag))) { - log(LOG_FATAL, "bf_open failed"); + logf (LOG_FATAL, "bf_open failed"); return 0; } - if ((rs = is_rb_read(&new->types[i], &th)) > 0) + if ((rs = is_rb_read(&inew->types[i], &th)) > 0) { - if (th.blocksize != new->types[i].blocksize) + if (th.blocksize != inew->types[i].blocksize) { - log(LOG_FATAL, "File blocksize mismatch in %s", name); + logf (LOG_FATAL, "File blocksize mismatch in %s", name); exit(1); } - new->types[i].freelist = th.freelist; - new->types[i].top = th.top; + inew->types[i].freelist = th.freelist; + inew->types[i].top = th.top; } else if (writeflag) /* write dummy superblock to determine top */ { - if ((rs = is_rb_write(&new->types[i], &th)) <=0) /* dummy */ + if ((rs = is_rb_write(&inew->types[i], &th)) <=0) /* dummy */ { - log(LOG_FATAL, "Failed to write initial superblock."); + logf (LOG_FATAL, "Failed to write initial superblock."); exit(1); } - new->types[i].freelist = -1; - new->types[i].top = rs; + inew->types[i].freelist = -1; + inew->types[i].top = rs; } /* ELSE: this is an empty file opened in read-only mode. */ } - if (!(r = res_get_def(common_resource, nm = strconcat(name, ".", "keysize", - 0), "4"))) - { - log(LOG_FATAL, "Failed to locate resource %s", nm); - return 0; - } - if ((new->keysize = atoi(r)) <= 0) + if (keysize > 0) + inew->keysize = keysize; + else { - log(LOG_FATAL, "Must specify positive keysize."); - return 0; + if (!(r = res_get_def(res, nm = strconcat(name, ".", + "keysize", 0), "4"))) + { + logf (LOG_FATAL, "Failed to locate resource %s", nm); + return 0; + } + if ((inew->keysize = atoi(r)) <= 0) + { + logf (LOG_FATAL, "Must specify positive keysize."); + return 0; + } } /* determine repack percent */ - if (!(r = res_get_def(common_resource, nm = strconcat(name, ".", "repack", - 0), IS_DEF_REPACK_PERCENT))) + if (!(r = res_get_def(res, nm = strconcat(name, ".", "repack", + 0), IS_DEF_REPACK_PERCENT))) { - log(LOG_FATAL, "Failed to locate resource %s", nm); + logf (LOG_FATAL, "Failed to locate resource %s", nm); return 0; } - new->repack = atoi(r); + inew->repack = atoi(r); /* determine max keys/blocksize */ - if (!(r = res_get(common_resource, nm = strconcat(name, ".", - "maxkeys", 0))) || !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES))) + if (!(r = res_get_def(res, + nm = strconcat(name, ".", + "maxkeys", 0), "50 640 10000")) || + !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES))) { - log(LOG_FATAL, "Failed to locate resource %s", nm); + logf (LOG_FATAL, "Failed to locate resource %s", nm); return 0; } - if (num < new->num_types -1) + if (num < inew->num_types -1) { - log(LOG_FATAL, "Not enough elements in %s", nm); + logf (LOG_FATAL, "Not enough elements in %s", nm); return 0; } for (i = 0; i < num; i++) { if ((rs = sscanf(pp[i], "%d", &tmp)) < 1) { - log(LOG_FATAL, "Error in resource %s: %s", r, pp[i]); + logf (LOG_FATAL, "Error in resource %s: %s", r, pp[i]); return 0; } - new->types[i].max_keys = tmp; + inew->types[i].max_keys = tmp; } /* determine max keys/block */ - for (i = 0; i < new->num_types; i++) + for (i = 0; i < inew->num_types; i++) { - if (!new->types[i].index) + if (!inew->types[i].index) { - new->types[i].max_keys_block = (new->types[i].blocksize - 2 * - sizeof(int)) / new->keysize; - new->types[i].max_keys_block0 = (new->types[i].blocksize - 3 * - sizeof(int)) / new->keysize; + inew->types[i].max_keys_block = (inew->types[i].blocksize - 2 * + sizeof(int)) / inew->keysize; + inew->types[i].max_keys_block0 = (inew->types[i].blocksize - 3 * + sizeof(int)) / inew->keysize; } else - new->types[i].max_keys_block = new->types[i].max_keys_block0 / - new->keysize; - if (new->types[i].max_keys_block0 < 1) + inew->types[i].max_keys_block = inew->types[i].max_keys_block0 / + inew->keysize; + if (inew->types[i].max_keys_block0 < 1) { - log(LOG_FATAL, "Blocksize too small in %s", name); + logf (LOG_FATAL, "Blocksize too small in %s", name); exit(1); } } /* determine nice fill rates */ - if (!(r = res_get(common_resource, nm = strconcat(name, ".", - "nicefill", 0))) || !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES))) + if (!(r = res_get_def(res, + nm = strconcat(name, ".", + "nicefill", 0), "90 90 90 95")) || + !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES))) { - log(LOG_FATAL, "Failed to locate resource %s", nm); + logf (LOG_FATAL, "Failed to locate resource %s", nm); return 0; } - if (num < new->num_types) + if (num < inew->num_types) { - log(LOG_FATAL, "Not enough elements in %s", nm); + logf (LOG_FATAL, "Not enough elements in %s", nm); return 0; } for (i = 0; i < num; i++) { if ((rs = sscanf(pp[i], "%d", &tmp)) < 1) { - log(LOG_FATAL, "Error in resource %s: %s", r, pp[i]); + logf (LOG_FATAL, "Error in resource %s: %s", r, pp[i]); return 0; } - new->types[i].nice_keys_block = (new->types[i].max_keys_block0 * tmp) / + inew->types[i].nice_keys_block = (inew->types[i].max_keys_block0 * tmp) / 100; - if (new->types[i].nice_keys_block < 1) - new->types[i].nice_keys_block = 1; + if (inew->types[i].nice_keys_block < 1) + inew->types[i].nice_keys_block = 1; } - new->cmp = is_default_cmp; - return new; + inew->cmp = cmp ? cmp : is_default_cmp; + return inew; } /* @@ -228,7 +302,7 @@ int is_close(ISAM is) int i; is_type_header th; - log(LOG_DEBUG, "is_close()"); + logf (LOG_DEBUG, "is_close()"); for (i = 0; i < is->num_types; i++) { if (is->types[i].bf) @@ -241,13 +315,38 @@ int is_close(ISAM is) th.top = is->types[i].top; if (is_rb_write(&is->types[i], &th) < 0) { - log(LOG_FATAL, "Failed to write headerblock"); + logf (LOG_FATAL, "Failed to write headerblock"); exit(1); } } bf_close(is->types[i].bf); } } + for (i = 0; i < is->num_types; i++) + xfree (is->types[i].dbuf); + + if (is->writeflag) + { + logf(LOG_LOG, "ISAM statistics:"); + logf(LOG_LOG, "total_merge_operations %d", + statistics.total_merge_operations); + logf(LOG_LOG, "total_items %d", statistics.total_items); + logf(LOG_LOG, "dub_items_removed %d", + statistics.dub_items_removed); + logf(LOG_LOG, "new_items %d", statistics.new_items); + logf(LOG_LOG, "failed_deletes %d", + statistics.failed_deletes); + logf(LOG_LOG, "skipped_inserts %d", + statistics.skipped_inserts); + logf(LOG_LOG, "delete_insert_noop %d", + statistics.delete_insert_noop); + logf(LOG_LOG, "delete_replace %d", + statistics.delete_replace); + logf(LOG_LOG, "delete %d", statistics.deletes); + logf(LOG_LOG, "remaps %d", statistics.remaps); + logf(LOG_LOG, "block_jumps %d", statistics.block_jumps); + logf(LOG_LOG, "tab_deletes %d", statistics.tab_deletes); + } xfree(is); return 0; } @@ -261,20 +360,24 @@ static ISAM_P is_address(int type, int pos) return r; } -ISAM_P is_merge(ISAM is, ISAM_P pos, int num, const char *data) +ISAM_P is_merge(ISAM is, ISAM_P pos, int num, char *data) { is_mtable tab; int res; char keybuf[IS_MAX_RECORD]; - int oldnum, oldtype; + int oldnum, oldtype, i; char operation, *record; + statistics.total_merge_operations++; + statistics.total_items += num; + if (!pos) + statistics.new_tables++; + is_m_establish_tab(is, &tab, pos); - /* TODO: do something to aquire oldnum at this point */ if (pos) if (is_m_read_full(&tab, tab.data) < 0) { - log(LOG_FATAL, "read_full failed"); + logf (LOG_FATAL, "read_full failed"); exit(1); } oldnum = tab.num_records; @@ -282,29 +385,35 @@ ISAM_P is_merge(ISAM is, ISAM_P pos, int num, const char *data) while (num) { operation = *(data)++; - record = (char*)data; + record = (char*) data; data += is_keysize(is); num--; - while (num && !memcmp(record, data, is_keysize(tab.is) + 1)) + while (num && !memcmp(record - 1, data, is_keysize(tab.is) + 1)) { data += 1 + is_keysize(is); num--; + statistics.dub_items_removed++; } if ((res = is_m_seek_record(&tab, record)) > 0) /* no match */ { if (operation == KEYOP_INSERT) { - log(LOG_DEBUG, "XXInserting new record."); + logf (LOG_DEBUG, "XXInserting new record."); is_m_write_record(&tab, record); + statistics.new_items++; } else - log(LOG_DEBUG, "XXDeletion failed to find match."); + { + logf (LOG_DEBUG, "XXDeletion failed to find match."); + statistics.failed_deletes++; + } } else /* match found */ { if (operation == KEYOP_INSERT) { - log(LOG_DEBUG, "XXSkipping insertion - match found."); + logf (LOG_DEBUG, "XXSkipping insertion - match found."); + statistics.skipped_inserts++; continue; } else if (operation == KEYOP_DELETE) @@ -315,47 +424,117 @@ ISAM_P is_merge(ISAM is, ISAM_P pos, int num, const char *data) /* next key is identical insert? - NOOP - skip it */ if (!memcmp(record, data + 1, is_keysize(is))) { - log(LOG_DEBUG, "XXNoop delete. skipping."); + logf (LOG_DEBUG, "XXNoop delete. skipping."); data += 1 + is_keysize(is); num--; + while (num && !memcmp(data, data + is_keysize(tab.is) + + 1, is_keysize(tab.is) + 1)) + { + data += 1 + is_keysize(is); + num--; + statistics.dub_items_removed++; + } + statistics.delete_insert_noop++; continue; } /* else check if next key can fit in this position */ - is_m_peek_record(&tab, keybuf); - res = (*is->cmp)(data + 1, keybuf); - if (res < 0) + if (is_m_peek_record(&tab, keybuf) && + (*is->cmp)(data + 1, keybuf) < 0) { - log(LOG_DEBUG, "XXReplacing record."); + logf (LOG_DEBUG, "XXReplacing record."); is_m_replace_record(&tab, data + 1); data += 1 + is_keysize(is); num--; + while (num && !memcmp(data, data + is_keysize(tab.is) + + 1, is_keysize(tab.is) + 1)) + { + data += 1 + is_keysize(is); + num--; + statistics.dub_items_removed++; + } + statistics.delete_replace++; continue; } } - log(LOG_DEBUG, "Deleting record."); + logf (LOG_DEBUG, "Deleting record."); is_m_delete_record(&tab); + statistics.deletes++; } } } - while (tab.pos_type < tab.is->num_types - 1 && tab.num_records > - tab.is->types[tab.pos_type].max_keys) - tab.pos_type++; + i = tab.pos_type; + while (i < tab.is->num_types - 1 && tab.num_records > + tab.is->types[i].max_keys) + i++; + if (i != tab.pos_type) + { + /* read remaining blocks */ + for (; tab.cur_mblock; tab.cur_mblock = tab.cur_mblock->next) + if (tab.cur_mblock->state < IS_MBSTATE_CLEAN) + is_m_read_full(&tab, tab.cur_mblock); + is_p_unmap(&tab); + tab.pos_type = i; + if (pos) + statistics.block_jumps++; + } if (!oldnum || tab.pos_type != oldtype || (abs(oldnum - tab.num_records) * 100) / oldnum > tab.is->repack) + { is_p_remap(&tab); + statistics.remaps++; + } else is_p_align(&tab); - is_p_sync(&tab); - return is_address(tab.pos_type, tab.data->diskpos); + if (tab.data) + { + is_p_sync(&tab); + pos = is_address(tab.pos_type, tab.data->diskpos); + } + else + { + pos = 0; + statistics.tab_deletes++; + } + is_m_release_tab(&tab); + return pos; } /* * Locate a table of keys in an isam file. The ISPT is an individual * position marker for that table. */ -ISPT is_position(ISAM is, ISAM_P pos); +ISPT is_position(ISAM is, ISAM_P pos) +{ + ispt_struct *p; + + p = ispt_alloc(); + is_m_establish_tab(is, &p->tab, pos); + return p; +} /* * Release ISPT. */ -void is_pt_free(ISPT ip); +void is_pt_free(ISPT ip) +{ + is_m_release_tab(&ip->tab); + ispt_free(ip); +} + +/* + * Read a key from a table. + */ +int is_readkey(ISPT ip, void *buf) +{ + return is_m_read_record(&ip->tab, buf, 0); +} + +int is_numkeys(ISPT ip) +{ + return is_m_num_records(&ip->tab); +} + +void is_rewind(ISPT ip) +{ + is_m_rewind(&ip->tab); +}