-/*
- * Copyright (C) 1994, Index Data I/S
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: dicttest.c,v $
- * Revision 1.7 1994-09-19 16:34:26 adam
- * Depend rule change. Minor changes in dicttest.c
- *
- * Revision 1.6 1994/09/16 15:39:12 adam
- * Initial code of lookup - not tested yet.
- *
- * Revision 1.5 1994/09/06 13:05:14 adam
- * Further development of insertion. Some special cases are
- * not properly handled yet! assert(0) are put here. The
- * binary search in each page definitely reduce usr CPU.
- *
- * Revision 1.4 1994/09/01 17:49:37 adam
- * Removed stupid line. Work on insertion in dictionary. Not finished yet.
- *
- * Revision 1.3 1994/09/01 17:44:06 adam
- * depend include change.
- *
- * Revision 1.2 1994/08/18 12:40:54 adam
- * Some development of dictionary. Not finished at all!
- *
- * Revision 1.1 1994/08/16 16:26:47 adam
- * Added dict.
- *
- */
+/* $Id: dicttest.c,v 1.27 2002-08-02 19:26:55 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <dict.h>
+#include <zebrautl.h>
char *prog;
-Dict dict;
+static Dict dict;
+
+static int look_hits;
+
+static int grep_handler (char *name, const char *info, void *client)
+{
+ look_hits++;
+ printf ("%s\n", name);
+ return 0;
+}
+
+static int scan_handler (char *name, const char *info, int pos, void *client)
+{
+ printf ("%s\n", name);
+ return 0;
+}
int main (int argc, char **argv)
{
+ Res my_resource = 0;
+ BFiles bfs;
const char *name = NULL;
const char *inputfile = NULL;
- const char *base = NULL;
+ const char *config = NULL;
+ const char *delete_term = NULL;
+ int scan_the_thing = 0;
+ int do_delete = 0;
+ int range = -1;
+ int srange = 0;
int rw = 0;
int infosize = 4;
int cache = 10;
int ret;
- int no_of_iterations = 0;
- int no_of_new = 0, no_of_same = 0, no_of_change = 0;
- int no_of_hits = 0, no_of_misses = 0;
int unique = 0;
+ char *grep_pattern = NULL;
char *arg;
+ int no_of_iterations = 0;
+ int no_of_new = 0, no_of_same = 0, no_of_change = 0;
+ int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0;
+ int max_pos;
prog = argv[0];
if (argc < 2)
{
- fprintf (stderr, "usage:\n"
- " %s [-u] [-s n] [-v n] [-i f] [-w] [-c n] base file\n",
+ fprintf (stderr, "usage:\n "
+ " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
+ "[-v n] [-i f] [-w] [-c n] config file\n\n",
prog);
+ fprintf (stderr, " -d delete instead of insert\n");
+ fprintf (stderr, " -D t delete subtree instead of insert\n");
+ fprintf (stderr, " -r n set regular match range\n");
+ fprintf (stderr, " -p n set regular match start range\n");
+ fprintf (stderr, " -u report if keys change during insert\n");
+ fprintf (stderr, " -g p try pattern n (see -r)\n");
+ fprintf (stderr, " -s n set info size to n (instead of 4)\n");
+ fprintf (stderr, " -v n set logging level\n");
+ fprintf (stderr, " -i f read file with words\n");
+ fprintf (stderr, " -w insert/delete instead of lookup\n");
+ fprintf (stderr, " -c n cache size (number of pages)\n");
+ fprintf (stderr, " -S scan the dictionary\n");
exit (1);
}
- while ((ret = options ("us:v:i:wc:", argv, argc, &arg)) != -2)
+ while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
- if (!base)
- base = arg;
+ if (!config)
+ config = arg;
else if (!name)
name = arg;
else
{
- log (LOG_FATAL, "too many files specified\n");
+ logf (LOG_FATAL, "too many files specified\n");
exit (1);
}
}
+ else if (ret == 'D')
+ {
+ delete_term = arg;
+ }
+ else if (ret == 'd')
+ do_delete = 1;
+ else if (ret == 'g')
+ {
+ grep_pattern = arg;
+ }
+ else if (ret == 'r')
+ {
+ range = atoi (arg);
+ }
+ else if (ret == 'p')
+ {
+ srange = atoi (arg);
+ }
else if (ret == 'u')
{
unique = 1;
rw = 1;
else if (ret == 'i')
inputfile = arg;
+ else if (ret == 'S')
+ scan_the_thing = 1;
else if (ret == 's')
{
infosize = atoi(arg);
}
else if (ret == 'v')
{
- log_init (atoi(arg), prog, NULL);
+ yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
}
else
{
- log (LOG_FATAL, "unknown option");
+ logf (LOG_FATAL, "Unknown option '-%s'", arg);
exit (1);
}
}
- if (!base || !name)
+ if (!config || !name)
+ {
+ logf (LOG_FATAL, "no config and/or dictionary specified");
+ exit (1);
+ }
+ my_resource = res_open (config, 0);
+ if (!my_resource)
{
- log (LOG_FATAL, "no base and/or dictionary specified");
+ logf (LOG_FATAL, "cannot open resource `%s'", config);
exit (1);
}
- common_resource = res_open (base);
- if (!common_resource)
+ bfs = bfs_create (res_get(my_resource, "register"), 0);
+ if (!bfs)
{
- log (LOG_FATAL, "cannot open resource `%s'", base);
+ logf (LOG_FATAL, "bfs_create fail");
exit (1);
}
- dict = dict_open (name, cache, rw);
+ dict = dict_open (bfs, name, cache, rw, 0);
if (!dict)
{
- log (LOG_FATAL, "dict_open fail of `%s'", name);
+ logf (LOG_FATAL, "dict_open fail of `%s'", name);
exit (1);
}
if (inputfile)
if (!(ipf = fopen(inputfile, "r")))
{
- log (LOG_FATAL|LOG_ERRNO, "cannot open %s", inputfile);
+ logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", inputfile);
exit (1);
}
ipf_ptr[i++] = '\0';
if (rw)
{
- switch(dict_insert (dict, ipf_ptr,
- infosize, infobytes))
- {
- case 0:
- no_of_new++;
- break;
- case 1:
- no_of_change++;
- if (unique)
- log (LOG_LOG, "%s change\n", ipf_ptr);
- break;
- case 2:
- if (unique)
- log (LOG_LOG, "%s duplicate\n", ipf_ptr);
- no_of_same++;
- break;
- }
+ if (do_delete)
+ switch (dict_delete (dict, ipf_ptr))
+ {
+ case 0:
+ no_not_found++;
+ break;
+ case 1:
+ no_of_deleted++;
+ }
+ else
+ switch(dict_insert (dict, ipf_ptr,
+ infosize, infobytes))
+ {
+ case 0:
+ no_of_new++;
+ break;
+ case 1:
+ no_of_change++;
+ if (unique)
+ logf (LOG_LOG, "%s change\n", ipf_ptr);
+ break;
+ case 2:
+ if (unique)
+ logf (LOG_LOG, "%s duplicate\n", ipf_ptr);
+ no_of_same++;
+ break;
+ }
}
- else
+ else if(range < 0)
{
char *cp;
cp = dict_lookup (dict, ipf_ptr);
- if (cp)
+ if (cp && *cp)
+ no_of_hits++;
+ else
+ no_of_misses++;
+ }
+ else
+ {
+ look_hits = 0;
+ dict_lookup_grep (dict, ipf_ptr, range, NULL,
+ &max_pos, srange, grep_handler);
+ if (look_hits)
no_of_hits++;
else
no_of_misses++;
}
++no_of_iterations;
+ if ((no_of_iterations % 10000) == 0)
+ {
+ printf ("."); fflush(stdout);
+ }
ipf_ptr += (i-1);
}
}
}
fclose (ipf);
}
+ if (rw && delete_term)
+ {
+ logf (LOG_LOG, "dict_delete_subtree %s", delete_term);
+ dict_delete_subtree (dict, delete_term, 0, 0);
+ }
+ if (grep_pattern)
+ {
+ if (range < 0)
+ range = 0;
+ logf (LOG_LOG, "Grepping '%s'", grep_pattern);
+ dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
+ srange, grep_handler);
+ }
if (rw)
{
- log (LOG_LOG, "Insertions.... %d", no_of_iterations);
- log (LOG_LOG, "No of new..... %d", no_of_new);
- log (LOG_LOG, "No of change.. %d", no_of_change);
- log (LOG_LOG, "No of same.... %d", no_of_same);
+ logf (LOG_LOG, "Iterations.... %d", no_of_iterations);
+ if (do_delete)
+ {
+ logf (LOG_LOG, "No of deleted. %d", no_of_deleted);
+ logf (LOG_LOG, "No not found.. %d", no_not_found);
+ }
+ else
+ {
+ logf (LOG_LOG, "No of new..... %d", no_of_new);
+ logf (LOG_LOG, "No of change.. %d", no_of_change);
+ }
}
else
{
- log (LOG_LOG, "Lookups....... %d", no_of_iterations);
- log (LOG_LOG, "No of hits.... %d", no_of_hits);
- log (LOG_LOG, "No of misses.. %d", no_of_misses);
+ logf (LOG_LOG, "Lookups....... %d", no_of_iterations);
+ logf (LOG_LOG, "No of hits.... %d", no_of_hits);
+ logf (LOG_LOG, "No of misses.. %d", no_of_misses);
+ }
+ if (scan_the_thing)
+ {
+ char term_dict[1024];
+
+ int before = 1000000;
+ int after = 1000000;
+ logf (LOG_LOG, "dict_scan");
+ term_dict[0] = 1;
+ term_dict[1] = 0;
+ dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
}
dict_close (dict);
- res_close (common_resource);
+ bfs_destroy (bfs);
+ res_close (my_resource);
return 0;
}