X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fbenchindex1.c;h=54cf08cff7802576006fea121319f10c742fc073;hb=07bf59c106e8d2492c5743019c2932a001e48181;hp=543523abbebc901e0a927ef49d20bc6cb05d236d;hpb=3cc9c2670e9230e76665ef3fa60eab3bfb523c6d;p=idzebra-moved-to-github.git diff --git a/isamb/benchindex1.c b/isamb/benchindex1.c index 543523a..54cf08c 100644 --- a/isamb/benchindex1.c +++ b/isamb/benchindex1.c @@ -1,8 +1,5 @@ -/* $Id: benchindex1.c,v 1.4 2006-12-11 15:08:55 adam Exp $ - Copyright (C) 1995-2006 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 2004-2013 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include #include #include #include @@ -100,10 +101,10 @@ int code_read(void *vp, char **dst, int *insertMode) if (b->current_entry >= b->no_entries) return 0; - + t = b->ar[b->current_entry]; b->current_entry++; - + key.len = 3; key.mem[0] = t->word_id; key.mem[1] = t->docid; @@ -130,8 +131,12 @@ void index_block_flush(struct index_block *b, ISAMB isb, Dict dict, int no_words = 0, no_new_words = 0; const char *dict_info = 0; ISAM_P isamc_p = 0; - zebra_timing_t tim = zebra_timing_create(); - + yaz_timing_t tim_dict = 0; + yaz_timing_t tim_isamb = 0; + zint number_of_int_splits = isamb_get_int_splits(isb); + zint number_of_leaf_splits = isamb_get_leaf_splits(isb); + zint number_of_dict_splits = dict_get_no_split(dict); + b->ar = xmalloc(sizeof(*b->ar) * b->no_entries); for (i = 0; i < b->no_entries; i++, t = t->next) { @@ -139,8 +144,9 @@ void index_block_flush(struct index_block *b, ISAMB isb, Dict dict, b->ar[i] = t; } assert(!t); - + qsort(b->ar, b->no_entries, sizeof(*b->ar), cmp_ar); + tim_dict = yaz_timing_create(); #if 0 for (i = 0; i < b->no_entries; i++) { @@ -185,12 +191,15 @@ void index_block_flush(struct index_block *b, ISAMB isb, Dict dict, } dict_insert(dict, "_w", sizeof(word_id_seq), &word_id_seq); + yaz_timing_stop(tim_dict); + tim_isamb = yaz_timing_create(); + b->current_entry = 0; if (b->no_entries) { ISAMC_I isamc_i; - + isamc_i.clientData = b; isamc_i.read_item = code_read; @@ -200,24 +209,47 @@ void index_block_flush(struct index_block *b, ISAMB isb, Dict dict, dict_insert(dict, "_i", sizeof(isamc_p), &isamc_p); } - yaz_log(YLOG_LOG, "Flushed %d postings, %d/%d words, %d records", - b->no_entries, no_words, no_new_words, no_docs); + yaz_timing_stop(tim_isamb); + + number_of_int_splits = isamb_get_int_splits(isb) - number_of_int_splits; + number_of_leaf_splits = isamb_get_leaf_splits(isb) - number_of_leaf_splits; + number_of_dict_splits = dict_get_no_split(dict) - number_of_dict_splits; + + if (b->round == 0) + { + printf("# run total dict-real user sys isam-real user sys " + " intsp leafsp docs postings words new d-spl\n"); + } + b->round++; + printf("%5d %9.6f %9.6f %5.2f %5.2f %9.6f %5.2f %5.2f " + "%6" ZINT_FORMAT0 " %6" ZINT_FORMAT0 + " %8d %8d %6d %6d" " %5" ZINT_FORMAT0 "\n", + b->round, + yaz_timing_get_real(tim_dict) + yaz_timing_get_real(tim_isamb), + yaz_timing_get_real(tim_dict), + yaz_timing_get_user(tim_dict), + yaz_timing_get_sys(tim_dict), + yaz_timing_get_real(tim_isamb), + yaz_timing_get_user(tim_isamb), + yaz_timing_get_sys(tim_isamb), + number_of_int_splits, + number_of_leaf_splits, + no_docs, + b->no_entries, + no_words, + no_new_words, + number_of_dict_splits + ); + fflush(stdout); + xfree(b->ar); b->ar = 0; nmem_reset(b->nmem); b->no_entries = 0; b->terms = 0; - b->round++; - - zebra_timing_stop(tim); - printf("%3d %8.6f %5.2f %5.2f\n", - b->round, - zebra_timing_get_real(tim), - zebra_timing_get_user(tim), - zebra_timing_get_sys(tim)); - fflush(stdout); - zebra_timing_destroy(&tim); + yaz_timing_destroy(&tim_isamb); + yaz_timing_destroy(&tim_dict); } void index_block_check_flush(struct index_block *b, ISAMB isb, Dict dict, @@ -227,7 +259,6 @@ void index_block_check_flush(struct index_block *b, ISAMB isb, Dict dict, int max = b->current_max; if (total > max) { - yaz_log(YLOG_LOG, "flush to disk total=%d max=%d", total, max); index_block_flush(b, isb, dict, no_docs); } } @@ -244,12 +275,6 @@ void index_block_add(struct index_block *b, b->no_entries++; } -void exit_usage(void) -{ - fprintf(stderr, "benchindex1 [-t type] [-m mem] [-i] [inputfile]\n"); - exit(1); -} - void index_term(struct index_block *b, const char *term, zint docid, zint *seqno) { @@ -284,7 +309,7 @@ void index_wrbuf(struct index_block *b, WRBUF wrbuf, zint docid, { /* continuation line */ for (i = 0; i<4 && *cp; i++, cp++) ; - } + } } nl = 0; if (*cp == '\n') @@ -325,7 +350,7 @@ void index_wrbuf(struct index_block *b, WRBUF wrbuf, zint docid, sz++; } cp++; - } + } } if (sz) index_term(b, term, docid, &seqno); @@ -389,7 +414,7 @@ void index_marc_line_records(ISAMB isb, } (*docid_seq)++; no_docs++; - index_block_check_flush(b, isb, dict, no_docs); + index_block_flush(b, isb, dict, no_docs); index_block_destroy(&b); } @@ -425,7 +450,7 @@ void index_marc_from_file(ISAMB isb, long off = ftell(inf) - 5; if (verbose || print_offset) printf("\n", + "%ld (0x%lx) -->\n", *buf & 0xff, *buf & 0xff, off, off); for (i = 0; i<4; i++) @@ -453,7 +478,7 @@ void index_marc_from_file(ISAMB isb, if (r < rlen) break; yaz_marc_read_iso2709(mt, buf, len); - + if (yaz_marc_write_line(mt, wrbuf)) break; @@ -465,29 +490,38 @@ void index_marc_from_file(ISAMB isb, index_block_check_flush(b, isb, dict, no_docs); } index_block_flush(b, isb, dict, no_docs); - wrbuf_free(wrbuf, 1); + wrbuf_destroy(wrbuf); yaz_marc_destroy(mt); index_block_destroy(&b); } +void exit_usage(void) +{ + fprintf(stderr, "benchindex1 [-t type] [-c d:i] [-m mem] [-i] [inputfile]\n"); + exit(1); +} + int main(int argc, char **argv) { BFiles bfs; - ISAMB isb; - ISAMC_M method; + ISAMB isb_postings; + ISAMC_M method_postings; Dict dict; int ret; int reset = 0; char *arg; int memory = 5; + int isam_cache_size = 40; + int dict_cache_size = 50; const char *fname = 0; FILE *inf = stdin; - zebra_timing_t tim = 0; + yaz_timing_t tim = 0; zint docid_seq = 1; const char *dict_info; const char *type = "iso2709"; + int int_count_enable = 1; - while ((ret = options("im:t:", argv, argc, &arg)) != -2) + while ((ret = options("im:t:c:N", argv, argc, &arg)) != -2) { switch(ret) { @@ -508,15 +542,26 @@ int main(int argc, char **argv) exit_usage(); } break; + case 'c': + if (sscanf(arg, "%d:%d", &dict_cache_size, &isam_cache_size) + != 2) + { + fprintf(stderr, "bad cache sizes for -c\n"); + exit_usage(); + } + break; case 0: fname = arg; break; + case 'N': + int_count_enable = 0; + break; default: fprintf(stderr, "bad option.\n"); exit_usage(); } } - + if (fname) { inf = fopen(fname, "rb"); @@ -526,17 +571,23 @@ int main(int argc, char **argv) exit(1); } } - /* setup method (attributes) */ - method.compare_item = key_compare; - method.log_item = key_logdump_txt; + printf("# benchindex1 %s %s\n", __DATE__, __TIME__); + printf("# isam_cache_size = %d\n", isam_cache_size); + printf("# dict_cache_size = %d\n", dict_cache_size); + printf("# int_count_enable = %d\n", int_count_enable); + printf("# memory = %d\n", memory); - method.codec.start = iscz1_start; - method.codec.decode = iscz1_decode; - method.codec.encode = iscz1_encode; - method.codec.stop = iscz1_stop; - method.codec.reset = iscz1_reset; + /* setup postings isamb attributes */ + method_postings.compare_item = key_compare; + method_postings.log_item = key_logdump_txt; - method.debug = 0; + method_postings.codec.start = iscz1_start; + method_postings.codec.decode = iscz1_decode; + method_postings.codec.encode = iscz1_encode; + method_postings.codec.stop = iscz1_stop; + method_postings.codec.reset = iscz1_reset; + + method_postings.debug = 0; /* create block system */ bfs = bfs_create(0, 0); @@ -549,15 +600,18 @@ int main(int argc, char **argv) if (reset) bf_reset(bfs); - tim = zebra_timing_create(); + tim = yaz_timing_create(); /* create isam handle */ - isb = isamb_open (bfs, "isamb", 1, &method, 0); - if (!isb) + isb_postings = isamb_open (bfs, "isamb", isam_cache_size ? 1 : 0, + &method_postings, 0); + if (!isb_postings) { yaz_log(YLOG_WARN, "isamb_open failed"); exit(2); } - dict = dict_open(bfs, "dict", 50, 1, 0, 4096); + isamb_set_cache_size(isb_postings, isam_cache_size); + isamb_set_int_count(isb_postings, int_count_enable); + dict = dict_open(bfs, "dict", dict_cache_size, 1, 0, 4096); dict_info = dict_lookup(dict, "_s"); if (dict_info) @@ -567,29 +621,29 @@ int main(int argc, char **argv) } if (!strcmp(type, "iso2709")) - index_marc_from_file(isb, dict, &docid_seq, inf, memory, + index_marc_from_file(isb_postings, dict, &docid_seq, inf, memory, 0 /* verbose */ , 0 /* print_offset */); else if (!strcmp(type, "line")) - index_marc_line_records(isb, dict, &docid_seq, inf, memory); + index_marc_line_records(isb_postings, dict, &docid_seq, inf, memory); - yaz_log(YLOG_LOG, "Total " ZINT_FORMAT " documents", docid_seq); + printf("# Total " ZINT_FORMAT " documents\n", docid_seq); dict_insert(dict, "_s", sizeof(docid_seq), &docid_seq); dict_close(dict); - isamb_close(isb); + isamb_close(isb_postings); if (fname) fclose(inf); /* exit block system */ bfs_destroy(bfs); - zebra_timing_stop(tim); + yaz_timing_stop(tim); + + printf("# Total timings real=%8.6f user=%3.2f system=%3.2f\n", + yaz_timing_get_real(tim), + yaz_timing_get_user(tim), + yaz_timing_get_sys(tim)); - yaz_log(YLOG_LOG, "Total %8.6f %5.2f %5.2f\n", - zebra_timing_get_real(tim), - zebra_timing_get_user(tim), - zebra_timing_get_sys(tim)); - - zebra_timing_destroy(&tim); + yaz_timing_destroy(&tim); exit(0); return 0; @@ -597,6 +651,7 @@ int main(int argc, char **argv) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab