From 4c6c27a5098e5fd9425051ef97f98e898c098a20 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 14 May 1996 14:04:33 +0000 Subject: [PATCH] In zebraidx, the 'stat' command is improved. Statistics about ISAM/DICT is collected. --- index/Makefile | 4 +- index/index.h | 7 ++- index/invstat.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ index/main.c | 9 +++- index/recstat.c | 9 ++-- 5 files changed, 151 insertions(+), 8 deletions(-) create mode 100644 index/invstat.c diff --git a/index/Makefile b/index/Makefile index 63bccac..d0204ec 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995-1996, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.36 1996-05-14 11:34:00 adam Exp $ +# $Id: Makefile,v 1.37 1996-05-14 14:04:33 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -20,7 +20,7 @@ TPROG3=zebrasrv DEFS=$(INCLUDE) O1 = main.o dir.o dirs.o trav.o extract.o kinput.o kcompare.o \ symtab.o text.o recctrl.o structrec.o recindex.o regxread.o recstat.o \ - lockutil.o lockidx.o zinfo.o + lockutil.o lockidx.o zinfo.o invstat.o O2 = kdump.o O3 = zserver.o kcompare.o zrpn.o zsets.o text.o recctrl.o structrec.o \ attribute.o recindex.o zlogs.o regxread.o lockutil.o locksrv.o zinfo.o diff --git a/index/index.h b/index/index.h index 5491d85..694b36d 100644 --- a/index/index.h +++ b/index/index.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: index.h,v $ - * Revision 1.38 1996-04-12 07:02:23 adam + * Revision 1.39 1996-05-14 14:04:33 adam + * In zebraidx, the 'stat' command is improved. Statistics about ISAM/DICT + * is collected. + * + * Revision 1.38 1996/04/12 07:02:23 adam * File update of single files. * * Revision 1.37 1996/03/26 16:01:13 adam @@ -203,6 +207,7 @@ void key_write (int cmd, struct it_key *k, const char *str); int key_compare (const void *p1, const void *p2); int key_qsort_compare (const void *p1, const void *p2); void key_logdump (int mask, const void *p); +void inv_prstat (const char *dict_fname, const char *isam_fname); void key_input (const char *dict_fname, const char *isam_fname, int nkeys, int cache); int merge_sort (char **buf, int from, int to); diff --git a/index/invstat.c b/index/invstat.c new file mode 100644 index 0000000..6cfeab9 --- /dev/null +++ b/index/invstat.c @@ -0,0 +1,130 @@ +/* + * Copyright (C) 1994-1996, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: invstat.c,v $ + * Revision 1.1 1996-05-14 14:04:34 adam + * In zebraidx, the 'stat' command is improved. Statistics about ISAM/DICT + * is collected. + * + */ +#include +#include +#include +#include + +#include "index.h" +#include "recindex.h" + +struct inv_stat_info { + ISAM isam; + int no_dict_entries; + int no_dict_bytes; + int isam_bounds[20]; + int isam_occurrences[20]; + char tmp[128]; +}; + +static int inv_stat_handle (char *name, const char *info, int pos, + void *client) +{ + int occur; + int i = 0; + struct inv_stat_info *stat_info = (struct inv_stat_info*) client; + ISPT ispt; + ISAM_P isam_p; + + stat_info->no_dict_entries++; + stat_info->no_dict_bytes += strlen(name); + + assert (*info == sizeof(ISAM_P)); + memcpy (&isam_p, info+1, sizeof(ISAM_P)); + + ispt = is_position (stat_info->isam, isam_p); + + occur = is_numkeys (ispt); + + is_pt_free (ispt); + + while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i]) + i++; + ++(stat_info->isam_occurrences[i]); + + return 0; +} + +void inv_prstat (const char *dict_fname, const char *isam_fname) +{ + Dict dict; + ISAM isam; + Records records; + int i, prev; + int before = 0; + int after = 1000000000; + struct inv_stat_info stat_info; + char term_dict[2*IT_MAX_WORD+2]; + + term_dict[0] = 1; + term_dict[1] = 0; + + dict = dict_open (dict_fname, 100, 0); + if (!dict) + { + logf (LOG_FATAL, "dict_open fail of `%s'", dict_fname); + exit (1); + } + isam = is_open (isam_fname, key_compare, 0, sizeof(struct it_key)); + if (!isam) + { + logf (LOG_FATAL, "is_open fail of `%s'", isam_fname); + exit (1); + } + records = rec_open (0); + + stat_info.no_dict_entries = 0; + stat_info.no_dict_bytes = 0; + stat_info.isam = isam; + stat_info.isam_bounds[0] = 1; + stat_info.isam_bounds[1] = 2; + stat_info.isam_bounds[2] = 3; + stat_info.isam_bounds[3] = 5; + stat_info.isam_bounds[4] = 10; + stat_info.isam_bounds[5] = 20; + stat_info.isam_bounds[6] = 30; + stat_info.isam_bounds[7] = 50; + stat_info.isam_bounds[8] = 100; + stat_info.isam_bounds[9] = 200; + stat_info.isam_bounds[10] = 5000; + stat_info.isam_bounds[11] = 10000; + stat_info.isam_bounds[12] = 20000; + stat_info.isam_bounds[13] = 50000; + stat_info.isam_bounds[14] = 100000; + stat_info.isam_bounds[15] = 200000; + stat_info.isam_bounds[16] = 500000; + stat_info.isam_bounds[17] = 1000000; + stat_info.isam_bounds[18] = 0; + + for (i = 0; i<20; i++) + stat_info.isam_occurrences[i] = 0; + + dict_scan (dict, term_dict, &before, &after, &stat_info, inv_stat_handle); + + rec_close (&records); + dict_close (dict); + is_close (isam); + + fprintf (stderr, "%d dictionary entries. %d bytes for strings\n", + stat_info.no_dict_entries, stat_info.no_dict_bytes); + fprintf (stderr, " size occurrences\n"); + prev = 1; + for (i = 0; stat_info.isam_bounds[i]; i++) + { + int here = stat_info.isam_bounds[i]; + fprintf (stderr, "%7d-%-7d %7d\n", + prev, here, stat_info.isam_occurrences[i]); + prev = here+1; + } + fprintf (stderr, "%7d- %7d\n", + prev, stat_info.isam_occurrences[i]); +} diff --git a/index/main.c b/index/main.c index bde872e..a3cbd58 100644 --- a/index/main.c +++ b/index/main.c @@ -1,10 +1,14 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1996, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.40 1996-04-26 10:00:23 adam + * Revision 1.41 1996-05-14 14:04:34 adam + * In zebraidx, the 'stat' command is improved. Statistics about ISAM/DICT + * is collected. + * + * Revision 1.40 1996/04/26 10:00:23 adam * Added option -V to zebraidx to display version information. * Removed stupid warnings from file update. * @@ -290,6 +294,7 @@ int main (int argc, char **argv) zebraIndexLockMsg ("r"); } rec_prstat (); + inv_prstat (FNAME_WORD_DICT, FNAME_WORD_ISAM); } else { diff --git a/index/recstat.c b/index/recstat.c index aa1ff65..c27ce8f 100644 --- a/index/recstat.c +++ b/index/recstat.c @@ -1,10 +1,14 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1996, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: recstat.c,v $ - * Revision 1.1 1995-12-06 12:41:26 adam + * Revision 1.2 1996-05-14 14:04:34 adam + * In zebraidx, the 'stat' command is improved. Statistics about ISAM/DICT + * is collected. + * + * Revision 1.1 1995/12/06 12:41:26 adam * New command 'stat' for the index program. * Filenames can be read from stdin by specifying '-'. * Bug fix/enhancement of the transformation from terms to regular @@ -15,7 +19,6 @@ #include #include #include -#include #include #include -- 1.7.10.4