X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dict%2Fdicttest.c;h=dbebc8d0c00545196666a856788911af693a780f;hp=86a733d36fdc0af0050dffd17030df16487c374d;hb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e;hpb=3c5ad6ec79e41c91b818e9953b08c6217795693d diff --git a/dict/dicttest.c b/dict/dicttest.c index 86a733d..dbebc8d 100644 --- a/dict/dicttest.c +++ b/dict/dicttest.c @@ -1,51 +1,133 @@ -/* - * Copyright (C) 1994, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: dicttest.c,v $ - * Revision 1.1 1994-08-16 16:26:47 adam - * Added dict. - * - */ +/* $Id: dicttest.c,v 1.36 2006-05-10 08:13:18 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ #include #include #include +#include -#include -#include +#include +#include +#include +#include char *prog; -Dict dict; +static Dict dict; + +static int look_hits; + +static int grep_handler (char *name, const char *info, void *client) +{ + look_hits++; + printf ("%s\n", name); + return 0; +} + +static int scan_handler (char *name, const char *info, int pos, void *client) +{ + printf ("%s\n", name); + return 0; +} int main (int argc, char **argv) { + Res my_resource = 0; + BFiles bfs; const char *name = NULL; const char *inputfile = NULL; + const char *config = NULL; + const char *delete_term = NULL; + int scan_the_thing = 0; + int do_delete = 0; + int range = -1; + int srange = 0; int rw = 0; + int infosize = 4; int cache = 10; int ret; - int verbose = 0; + int unique = 0; + char *grep_pattern = NULL; char *arg; + int no_of_iterations = 0; + int no_of_new = 0, no_of_same = 0, no_of_change = 0; + int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0; + int max_pos; prog = argv[0]; if (argc < 2) { - fprintf (stderr, "usage:\n" - " %s [-v n] [-i f] [-w] [-c n] file\n", prog); + fprintf (stderr, "usage:\n " + " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] " + "[-v n] [-i f] [-w] [-c n] config file\n\n", + prog); + fprintf (stderr, " -d delete instead of insert\n"); + fprintf (stderr, " -D t delete subtree instead of insert\n"); + fprintf (stderr, " -r n set regular match range\n"); + fprintf (stderr, " -p n set regular match start range\n"); + fprintf (stderr, " -u report if keys change during insert\n"); + fprintf (stderr, " -g p try pattern n (see -r)\n"); + fprintf (stderr, " -s n set info size to n (instead of 4)\n"); + fprintf (stderr, " -v n set logging level\n"); + fprintf (stderr, " -i f read file with words\n"); + fprintf (stderr, " -w insert/delete instead of lookup\n"); + fprintf (stderr, " -c n cache size (number of pages)\n"); + fprintf (stderr, " -S scan the dictionary\n"); exit (1); } - while ((ret = options ("v:i:wc:", argv, argc, &arg)) != -2) + while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2) { if (ret == 0) { - if (name) + if (!config) + config = arg; + else if (!name) + name = arg; + else { - fprintf (stderr, "%s: too many files specified\n", prog); + yaz_log (YLOG_FATAL, "too many files specified\n"); exit (1); } - name = arg; + } + else if (ret == 'D') + { + delete_term = arg; + } + else if (ret == 'd') + do_delete = 1; + else if (ret == 'g') + { + grep_pattern = arg; + } + else if (ret == 'r') + { + range = atoi (arg); + } + else if (ret == 'p') + { + srange = atoi (arg); + } + else if (ret == 'u') + { + unique = 1; } else if (ret == 'c') { @@ -56,56 +138,192 @@ int main (int argc, char **argv) else if (ret == 'w') rw = 1; else if (ret == 'i') - { inputfile = arg; - rw = 1; + else if (ret == 'S') + scan_the_thing = 1; + else if (ret == 's') + { + infosize = atoi(arg); } else if (ret == 'v') - verbose = atoi(arg); + { + yaz_log_init (yaz_log_mask_str(arg), prog, NULL); + } else { - fprintf (stderr, "%s: unknown option\n", prog); + yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg); exit (1); } } - if (!name) + if (!config || !name) { - fprintf (stderr, "%s: no dictionary file given\n", prog); + yaz_log (YLOG_FATAL, "no config and/or dictionary specified"); exit (1); } - dict = dict_open (name, cache, rw); + my_resource = res_open(0, 0); + if (!my_resource) + { + yaz_log (YLOG_FATAL, "cannot open resource `%s'", config); + exit (1); + } + res_read_file(my_resource, config); + + bfs = bfs_create (res_get(my_resource, "register"), 0); + if (!bfs) + { + yaz_log (YLOG_FATAL, "bfs_create fail"); + exit (1); + } + dict = dict_open (bfs, name, cache, rw, 0, 4096); if (!dict) { - fprintf (stderr, "%s: dict_open fail\n", prog); + yaz_log (YLOG_FATAL, "dict_open fail of `%s'", name); exit (1); } if (inputfile) { FILE *ipf; - char ipf_buf[256]; - char word[256]; - int i, line = 1; + char ipf_buf[1024]; + int line = 1; + char infobytes[120]; + memset (infobytes, 0, 120); if (!(ipf = fopen(inputfile, "r"))) { - fprintf (stderr, "%s: cannot open %s\n", prog, inputfile); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open %s", inputfile); exit (1); } - while (fgets (ipf_buf, 255, ipf)) + while (fgets (ipf_buf, 1023, ipf)) { - for (i=0; i<255; i++) - if (ipf_buf[i] > ' ') - word[i] = ipf_buf[i]; - else - break; - word[i] = 0; - if (i) - dict_insert (dict, word, &line); + char *ipf_ptr = ipf_buf; + sprintf (infobytes, "%d", line); + for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++) + { + if (isalpha(*ipf_ptr) || *ipf_ptr == '_') + { + int i = 1; + while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) || + ipf_ptr[i] == '_')) + i++; + if (ipf_ptr[i]) + ipf_ptr[i++] = '\0'; + if (rw) + { + if (do_delete) + switch (dict_delete (dict, ipf_ptr)) + { + case 0: + no_not_found++; + break; + case 1: + no_of_deleted++; + } + else + switch(dict_insert (dict, ipf_ptr, + infosize, infobytes)) + { + case 0: + no_of_new++; + break; + case 1: + no_of_change++; + if (unique) + yaz_log (YLOG_LOG, "%s change\n", ipf_ptr); + break; + case 2: + if (unique) + yaz_log (YLOG_LOG, "%s duplicate\n", ipf_ptr); + no_of_same++; + break; + } + } + else if(range < 0) + { + char *cp; + + cp = dict_lookup (dict, ipf_ptr); + if (cp && *cp) + no_of_hits++; + else + no_of_misses++; + } + else + { + look_hits = 0; + dict_lookup_grep (dict, ipf_ptr, range, NULL, + &max_pos, srange, grep_handler); + if (look_hits) + no_of_hits++; + else + no_of_misses++; + } + ++no_of_iterations; + if ((no_of_iterations % 10000) == 0) + { + printf ("."); fflush(stdout); + } + ipf_ptr += (i-1); + } + } ++line; } fclose (ipf); } + if (rw && delete_term) + { + yaz_log (YLOG_LOG, "dict_delete_subtree %s", delete_term); + dict_delete_subtree (dict, delete_term, 0, 0); + } + if (grep_pattern) + { + if (range < 0) + range = 0; + yaz_log (YLOG_LOG, "Grepping '%s'", grep_pattern); + dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos, + srange, grep_handler); + } + if (rw) + { + yaz_log (YLOG_LOG, "Iterations.... %d", no_of_iterations); + if (do_delete) + { + yaz_log (YLOG_LOG, "No of deleted. %d", no_of_deleted); + yaz_log (YLOG_LOG, "No not found.. %d", no_not_found); + } + else + { + yaz_log (YLOG_LOG, "No of new..... %d", no_of_new); + yaz_log (YLOG_LOG, "No of change.. %d", no_of_change); + } + } + else + { + yaz_log (YLOG_LOG, "Lookups....... %d", no_of_iterations); + yaz_log (YLOG_LOG, "No of hits.... %d", no_of_hits); + yaz_log (YLOG_LOG, "No of misses.. %d", no_of_misses); + } + if (scan_the_thing) + { + char term_dict[1024]; + + int before = 1000000; + int after = 1000000; + yaz_log (YLOG_LOG, "dict_scan"); + term_dict[0] = 1; + term_dict[1] = 0; + dict_scan (dict, term_dict, &before, &after, 0, scan_handler); + } dict_close (dict); + bfs_destroy (bfs); + res_close (my_resource); return 0; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +