-/*
- * Copyright (C) 1994-1999, Index Data
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: dicttest.c,v $
- * Revision 1.24 2000-09-05 14:04:05 adam
- * Updates for prefix 'yaz_' for YAZ log functions.
- *
- * Revision 1.23 2000/07/07 12:49:20 adam
- * Optimized resultSetInsert{Rank,Sort}.
- *
- * Revision 1.22 1999/02/02 14:50:19 adam
- * Updated WIN32 code specific sections. Changed header.
- *
- * Revision 1.21 1996/10/29 14:00:03 adam
- * Page size given by DICT_DEFAULT_PAGESIZE in dict.h.
- *
- * Revision 1.20 1996/03/20 09:35:16 adam
- * Function dict_lookup_grep got extra parameter, init_pos, which marks
- * from which position in pattern approximate pattern matching should occur.
- *
- * Revision 1.19 1996/02/02 13:43:50 adam
- * The public functions simply use char instead of Dict_char to represent
- * search strings. Dict_char is used internally only.
- *
- * Revision 1.18 1996/02/01 20:39:52 adam
- * Bug fix: insert didn't work on 8-bit characters due to unsigned char
- * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is
- * unsigned now.
- *
- * Revision 1.17 1995/12/06 17:48:30 adam
- * Bug fix: delete didn't work.
- *
- * Revision 1.16 1995/10/09 16:18:31 adam
- * Function dict_lookup_grep got extra client data parameter.
- *
- * Revision 1.15 1995/09/04 12:33:31 adam
- * Various cleanup. YAZ util used instead.
- *
- * Revision 1.14 1994/10/04 17:46:55 adam
- * Function options now returns arg with error option.
- *
- * Revision 1.13 1994/10/04 12:08:05 adam
- * Some bug fixes and some optimizations.
- *
- * Revision 1.12 1994/10/03 17:23:03 adam
- * First version of dictionary lookup with regular expressions and errors.
- *
- * Revision 1.11 1994/09/28 13:07:09 adam
- * Use log_mask_str now.
- *
- * Revision 1.10 1994/09/26 10:17:24 adam
- * Minor changes.
- *
- * Revision 1.9 1994/09/22 14:43:56 adam
- * First functional version of lookup with error correction. A 'range'
- * specified the maximum number of insertions+deletions+substitutions.
- *
- * Revision 1.8 1994/09/22 10:43:44 adam
- * Two versions of depend. Type 1 is the tail-type compatible with
- * all make programs. Type 2 is the GNU make with include facility.
- * Type 2 is default. depend rule chooses current rule.
- *
- * Revision 1.7 1994/09/19 16:34:26 adam
- * Depend rule change. Minor changes in dicttest.c
- *
- * Revision 1.6 1994/09/16 15:39:12 adam
- * Initial code of lookup - not tested yet.
- *
- * Revision 1.5 1994/09/06 13:05:14 adam
- * Further development of insertion. Some special cases are
- * not properly handled yet! assert(0) are put here. The
- * binary search in each page definitely reduce usr CPU.
- *
- * Revision 1.4 1994/09/01 17:49:37 adam
- * Removed stupid line. Work on insertion in dictionary. Not finished yet.
- *
- * Revision 1.3 1994/09/01 17:44:06 adam
- * depend include change.
- *
- * Revision 1.2 1994/08/18 12:40:54 adam
- * Some development of dictionary. Not finished at all!
- *
- * Revision 1.1 1994/08/16 16:26:47 adam
- * Added dict.
- *
- */
+/* $Id: dicttest.c,v 1.27 2002-08-02 19:26:55 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <stdlib.h>
#include <string.h>
static int look_hits;
-static int grep_handle (char *name, const char *info, void *client)
+static int grep_handler (char *name, const char *info, void *client)
{
look_hits++;
printf ("%s\n", name);
return 0;
}
+static int scan_handler (char *name, const char *info, int pos, void *client)
+{
+ printf ("%s\n", name);
+ return 0;
+}
+
int main (int argc, char **argv)
{
Res my_resource = 0;
const char *name = NULL;
const char *inputfile = NULL;
const char *config = NULL;
+ const char *delete_term = NULL;
+ int scan_the_thing = 0;
int do_delete = 0;
int range = -1;
int srange = 0;
if (argc < 2)
{
fprintf (stderr, "usage:\n "
- " %s [-d] [-r n] [-p n] [-u] [-g pat] [-s n] [-v n] [-i f]"
- " [-w] [-c n] config file\n\n",
+ " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
+ "[-v n] [-i f] [-w] [-c n] config file\n\n",
prog);
fprintf (stderr, " -d delete instead of insert\n");
+ fprintf (stderr, " -D t delete subtree instead of insert\n");
fprintf (stderr, " -r n set regular match range\n");
fprintf (stderr, " -p n set regular match start range\n");
fprintf (stderr, " -u report if keys change during insert\n");
fprintf (stderr, " -i f read file with words\n");
fprintf (stderr, " -w insert/delete instead of lookup\n");
fprintf (stderr, " -c n cache size (number of pages)\n");
+ fprintf (stderr, " -S scan the dictionary\n");
exit (1);
}
- while ((ret = options ("dr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
+ while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
exit (1);
}
}
+ else if (ret == 'D')
+ {
+ delete_term = arg;
+ }
else if (ret == 'd')
do_delete = 1;
else if (ret == 'g')
rw = 1;
else if (ret == 'i')
inputfile = arg;
+ else if (ret == 'S')
+ scan_the_thing = 1;
else if (ret == 's')
{
infosize = atoi(arg);
logf (LOG_FATAL, "no config and/or dictionary specified");
exit (1);
}
- my_resource = res_open (config);
+ my_resource = res_open (config, 0);
if (!my_resource)
{
logf (LOG_FATAL, "cannot open resource `%s'", config);
exit (1);
}
- bfs = bfs_create (res_get(my_resource, "register"));
+ bfs = bfs_create (res_get(my_resource, "register"), 0);
if (!bfs)
{
logf (LOG_FATAL, "bfs_create fail");
ipf_ptr[i++] = '\0';
if (rw)
{
- if (do_delete)
+ if (do_delete)
switch (dict_delete (dict, ipf_ptr))
{
case 0:
{
look_hits = 0;
dict_lookup_grep (dict, ipf_ptr, range, NULL,
- &max_pos, srange, grep_handle);
+ &max_pos, srange, grep_handler);
if (look_hits)
no_of_hits++;
else
}
fclose (ipf);
}
+ if (rw && delete_term)
+ {
+ logf (LOG_LOG, "dict_delete_subtree %s", delete_term);
+ dict_delete_subtree (dict, delete_term, 0, 0);
+ }
if (grep_pattern)
{
if (range < 0)
range = 0;
logf (LOG_LOG, "Grepping '%s'", grep_pattern);
dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
- srange, grep_handle);
+ srange, grep_handler);
}
if (rw)
{
logf (LOG_LOG, "No of hits.... %d", no_of_hits);
logf (LOG_LOG, "No of misses.. %d", no_of_misses);
}
+ if (scan_the_thing)
+ {
+ char term_dict[1024];
+
+ int before = 1000000;
+ int after = 1000000;
+ logf (LOG_LOG, "dict_scan");
+ term_dict[0] = 1;
+ term_dict[1] = 0;
+ dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
+ }
dict_close (dict);
bfs_destroy (bfs);
res_close (my_resource);