From 349ddf2c6256dc5d0fbabb693b688883610ae5fa Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 29 Apr 2002 18:03:45 +0000 Subject: [PATCH] More isamb statistics --- include/isamb.h | 7 ++++++- index/invstat.c | 49 +++++++++++++++++++++++++++++++++++++++++++++---- isamb/isamb.c | 39 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 87 insertions(+), 8 deletions(-) diff --git a/include/isamb.h b/include/isamb.h index e89e8cc..9fb5f71 100644 --- a/include/isamb.h +++ b/include/isamb.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2000-2002, Index Data * All rights reserved. - * $Id: isamb.h,v 1.2 2002-04-16 22:31:42 adam Exp $ + * $Id: isamb.h,v 1.3 2002-04-29 18:03:45 adam Exp $ */ #ifndef ISAMB_H @@ -27,4 +27,9 @@ void isamb_pp_close (ISAMB_PP pp); int isamb_pp_num (ISAMB_PP pp); +ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level); +void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks); + +int isamb_block_info (ISAMB isamb, int cat); + #endif diff --git a/index/invstat.c b/index/invstat.c index f4b548c..7310b29 100644 --- a/index/invstat.c +++ b/index/invstat.c @@ -20,6 +20,10 @@ struct inv_stat_info { int isam_bounds[20]; int isam_occurrences[20]; char tmp[128]; + int isamb_levels[10]; + int isamb_sizes[10]; + int isamb_blocks[10]; + int isamb_no[10]; }; #define SINGLETON_TYPE 8 /* the type to use for singletons that */ @@ -116,11 +120,19 @@ static int inv_stat_handle (char *name, const char *info, int pos, { ISAMB_PP pp; struct it_key key; - - pp = isamb_pp_open(stat_info->zh->reg->isamb, isam_p); + int cat = isam_p & 3; + int level; + int size; + int blocks; + + pp = isamb_pp_open_x(stat_info->zh->reg->isamb, isam_p, &level); while (isamb_pp_read(pp, &key)) occur++; - isamb_pp_close (pp); + isamb_pp_close_x (pp, &size, &blocks); + stat_info->isamb_no[cat]++; + stat_info->isamb_levels[cat] += level; + stat_info->isamb_blocks[cat] += blocks; + stat_info->isamb_sizes[cat] += size; } while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i]) @@ -175,6 +187,14 @@ void zebra_register_statistics (ZebraHandle zh) for (i = 0; i<20; i++) stat_info.isam_occurrences[i] = 0; + for (i = 0; i<10; i++) + { + stat_info.isamb_levels[i] = 0; + stat_info.isamb_sizes[i] = 0; + stat_info.isamb_blocks[i] = 0; + stat_info.isamb_no[i] = 0; + } + dict_scan (zh->reg->dict, term_dict, &before, &after, &stat_info, inv_stat_handle); @@ -230,6 +250,24 @@ void zebra_register_statistics (ZebraHandle zh) if ( (zh->reg->isamd) && (zh->reg->isamd->method->debug>0)) fprintf (stderr, "\n%d words using %d bytes\n", stat_info.no_dict_entries, stat_info.no_dict_bytes); + + if (zh->reg->isamb) + { + for (i = 0; i<4; i++) + { + int bsize = isamb_block_info(zh->reg->isamb, i); + if (bsize < 0) + break; + fprintf (stderr, "Category %d\n", i); + fprintf (stderr, "Block size %d\n", bsize); + fprintf (stderr, "Lists: %d\n", stat_info.isamb_no[i]); + fprintf (stderr, "Blocks: %d\n", stat_info.isamb_blocks[i]); + fprintf (stderr, "Size: %d\n", stat_info.isamb_sizes[i]); + fprintf (stderr, "Total %d\n", stat_info.isamb_blocks[i]* + bsize); + } + } + fprintf (stderr, " Occurrences Words\n"); prev = 1; for (i = 0; stat_info.isam_bounds[i]; i++) @@ -249,7 +287,10 @@ void zebra_register_statistics (ZebraHandle zh) /* * * $Log: invstat.c,v $ - * Revision 1.25 2002-04-26 08:44:47 adam + * Revision 1.26 2002-04-29 18:03:46 adam + * More isamb statistics + * + * Revision 1.25 2002/04/26 08:44:47 adam * Index statistics working again * * Revision 1.24 2002/04/05 08:46:26 adam diff --git a/isamb/isamb.c b/isamb/isamb.c index 5627af4..8578f96 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -2,7 +2,7 @@ * Copyright (c) 2000-2002, Index Data. * See the file LICENSE for details. * - * $Id: isamb.c,v 1.10 2002-04-26 08:44:47 adam Exp $ + * $Id: isamb.c,v 1.11 2002-04-29 18:03:46 adam Exp $ */ #include #include @@ -53,6 +53,8 @@ struct ISAMB_block { struct ISAMB_PP_s { ISAMB isamb; int level; + int total_size; + int no_blocks; struct ISAMB_block **block; }; @@ -558,7 +560,7 @@ int isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I stream) return pos; } -ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) +ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level) { ISAMB_PP pp = xmalloc (sizeof(*pp)); @@ -566,12 +568,17 @@ ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) pp->block = xmalloc (10 * sizeof(*pp->block)); pp->level = 0; + pp->total_size = 0; + pp->no_blocks = 0; while (1) { struct ISAMB_block *p = open_block (isamb, pos); char *src = p->bytes + p->offset; pp->block[pp->level] = p; + pp->total_size += p->size; + pp->no_blocks++; + if (p->bytes[0]) /* leaf */ break; @@ -580,20 +587,43 @@ ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) pp->level++; } pp->block[pp->level+1] = 0; + if (level) + *level = pp->level; return pp; } -void isamb_pp_close (ISAMB_PP pp) +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) +{ + return isamb_pp_open_x (isamb, pos, 0); +} + +void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks) { int i; if (!pp) return; + if (size) + *size = pp->total_size; + if (blocks) + *blocks = pp->no_blocks; for (i = 0; i <= pp->level; i++) close_block (pp->isamb, pp->block[i]); xfree (pp->block); xfree (pp); } +int isamb_block_info (ISAMB isamb, int cat) +{ + if (cat >= 0 && cat < isamb->no_cat) + return isamb->file[cat].head.block_size; + return -1; +} + +void isamb_pp_close (ISAMB_PP pp) +{ + return isamb_pp_close_x (pp, 0, 0); +} + int isamb_pp_read (ISAMB_PP pp, void *buf) { char *dst = buf; @@ -628,6 +658,9 @@ int isamb_pp_read (ISAMB_PP pp, void *buf) while (1) { pp->block[pp->level] = p = open_block (pp->isamb, pos); + + pp->total_size += p->size; + pp->no_blocks++; if (p->leaf) /* leaf */ { -- 1.7.10.4