X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dict%2Fdicttest.c;h=dbebc8d0c00545196666a856788911af693a780f;hp=5538b497b53b487b21b0c48fe5df991a35f1c002;hb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e;hpb=1c3797bc503c1e7a109c8887d89d3ddda93bba71 diff --git a/dict/dicttest.c b/dict/dicttest.c index 5538b49..dbebc8d 100644 --- a/dict/dicttest.c +++ b/dict/dicttest.c @@ -1,109 +1,53 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: dicttest.c,v $ - * Revision 1.23 2000-07-07 12:49:20 adam - * Optimized resultSetInsert{Rank,Sort}. - * - * Revision 1.22 1999/02/02 14:50:19 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.21 1996/10/29 14:00:03 adam - * Page size given by DICT_DEFAULT_PAGESIZE in dict.h. - * - * Revision 1.20 1996/03/20 09:35:16 adam - * Function dict_lookup_grep got extra parameter, init_pos, which marks - * from which position in pattern approximate pattern matching should occur. - * - * Revision 1.19 1996/02/02 13:43:50 adam - * The public functions simply use char instead of Dict_char to represent - * search strings. Dict_char is used internally only. - * - * Revision 1.18 1996/02/01 20:39:52 adam - * Bug fix: insert didn't work on 8-bit characters due to unsigned char - * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is - * unsigned now. - * - * Revision 1.17 1995/12/06 17:48:30 adam - * Bug fix: delete didn't work. - * - * Revision 1.16 1995/10/09 16:18:31 adam - * Function dict_lookup_grep got extra client data parameter. - * - * Revision 1.15 1995/09/04 12:33:31 adam - * Various cleanup. YAZ util used instead. - * - * Revision 1.14 1994/10/04 17:46:55 adam - * Function options now returns arg with error option. - * - * Revision 1.13 1994/10/04 12:08:05 adam - * Some bug fixes and some optimizations. - * - * Revision 1.12 1994/10/03 17:23:03 adam - * First version of dictionary lookup with regular expressions and errors. - * - * Revision 1.11 1994/09/28 13:07:09 adam - * Use log_mask_str now. - * - * Revision 1.10 1994/09/26 10:17:24 adam - * Minor changes. - * - * Revision 1.9 1994/09/22 14:43:56 adam - * First functional version of lookup with error correction. A 'range' - * specified the maximum number of insertions+deletions+substitutions. - * - * Revision 1.8 1994/09/22 10:43:44 adam - * Two versions of depend. Type 1 is the tail-type compatible with - * all make programs. Type 2 is the GNU make with include facility. - * Type 2 is default. depend rule chooses current rule. - * - * Revision 1.7 1994/09/19 16:34:26 adam - * Depend rule change. Minor changes in dicttest.c - * - * Revision 1.6 1994/09/16 15:39:12 adam - * Initial code of lookup - not tested yet. - * - * Revision 1.5 1994/09/06 13:05:14 adam - * Further development of insertion. Some special cases are - * not properly handled yet! assert(0) are put here. The - * binary search in each page definitely reduce usr CPU. - * - * Revision 1.4 1994/09/01 17:49:37 adam - * Removed stupid line. Work on insertion in dictionary. Not finished yet. - * - * Revision 1.3 1994/09/01 17:44:06 adam - * depend include change. - * - * Revision 1.2 1994/08/18 12:40:54 adam - * Some development of dictionary. Not finished at all! - * - * Revision 1.1 1994/08/16 16:26:47 adam - * Added dict. - * - */ +/* $Id: dicttest.c,v 1.36 2006-05-10 08:13:18 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ #include #include #include #include -#include -#include +#include +#include +#include +#include char *prog; static Dict dict; static int look_hits; -static int grep_handle (char *name, const char *info, void *client) +static int grep_handler (char *name, const char *info, void *client) { look_hits++; printf ("%s\n", name); return 0; } +static int scan_handler (char *name, const char *info, int pos, void *client) +{ + printf ("%s\n", name); + return 0; +} + int main (int argc, char **argv) { Res my_resource = 0; @@ -111,6 +55,8 @@ int main (int argc, char **argv) const char *name = NULL; const char *inputfile = NULL; const char *config = NULL; + const char *delete_term = NULL; + int scan_the_thing = 0; int do_delete = 0; int range = -1; int srange = 0; @@ -130,10 +76,11 @@ int main (int argc, char **argv) if (argc < 2) { fprintf (stderr, "usage:\n " - " %s [-d] [-r n] [-p n] [-u] [-g pat] [-s n] [-v n] [-i f]" - " [-w] [-c n] config file\n\n", + " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] " + "[-v n] [-i f] [-w] [-c n] config file\n\n", prog); fprintf (stderr, " -d delete instead of insert\n"); + fprintf (stderr, " -D t delete subtree instead of insert\n"); fprintf (stderr, " -r n set regular match range\n"); fprintf (stderr, " -p n set regular match start range\n"); fprintf (stderr, " -u report if keys change during insert\n"); @@ -143,9 +90,10 @@ int main (int argc, char **argv) fprintf (stderr, " -i f read file with words\n"); fprintf (stderr, " -w insert/delete instead of lookup\n"); fprintf (stderr, " -c n cache size (number of pages)\n"); + fprintf (stderr, " -S scan the dictionary\n"); exit (1); } - while ((ret = options ("dr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2) + while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -155,10 +103,14 @@ int main (int argc, char **argv) name = arg; else { - logf (LOG_FATAL, "too many files specified\n"); + yaz_log (YLOG_FATAL, "too many files specified\n"); exit (1); } } + else if (ret == 'D') + { + delete_term = arg; + } else if (ret == 'd') do_delete = 1; else if (ret == 'g') @@ -187,41 +139,45 @@ int main (int argc, char **argv) rw = 1; else if (ret == 'i') inputfile = arg; + else if (ret == 'S') + scan_the_thing = 1; else if (ret == 's') { infosize = atoi(arg); } else if (ret == 'v') { - log_init (log_mask_str(arg), prog, NULL); + yaz_log_init (yaz_log_mask_str(arg), prog, NULL); } else { - logf (LOG_FATAL, "Unknown option '-%s'", arg); + yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg); exit (1); } } if (!config || !name) { - logf (LOG_FATAL, "no config and/or dictionary specified"); + yaz_log (YLOG_FATAL, "no config and/or dictionary specified"); exit (1); } - my_resource = res_open (config); + my_resource = res_open(0, 0); if (!my_resource) { - logf (LOG_FATAL, "cannot open resource `%s'", config); + yaz_log (YLOG_FATAL, "cannot open resource `%s'", config); exit (1); } - bfs = bfs_create (res_get(my_resource, "register")); + res_read_file(my_resource, config); + + bfs = bfs_create (res_get(my_resource, "register"), 0); if (!bfs) { - logf (LOG_FATAL, "bfs_create fail"); + yaz_log (YLOG_FATAL, "bfs_create fail"); exit (1); } - dict = dict_open (bfs, name, cache, rw, 0); + dict = dict_open (bfs, name, cache, rw, 0, 4096); if (!dict) { - logf (LOG_FATAL, "dict_open fail of `%s'", name); + yaz_log (YLOG_FATAL, "dict_open fail of `%s'", name); exit (1); } if (inputfile) @@ -234,7 +190,7 @@ int main (int argc, char **argv) if (!(ipf = fopen(inputfile, "r"))) { - logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", inputfile); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open %s", inputfile); exit (1); } @@ -254,7 +210,7 @@ int main (int argc, char **argv) ipf_ptr[i++] = '\0'; if (rw) { - if (do_delete) + if (do_delete) switch (dict_delete (dict, ipf_ptr)) { case 0: @@ -273,11 +229,11 @@ int main (int argc, char **argv) case 1: no_of_change++; if (unique) - logf (LOG_LOG, "%s change\n", ipf_ptr); + yaz_log (YLOG_LOG, "%s change\n", ipf_ptr); break; case 2: if (unique) - logf (LOG_LOG, "%s duplicate\n", ipf_ptr); + yaz_log (YLOG_LOG, "%s duplicate\n", ipf_ptr); no_of_same++; break; } @@ -296,7 +252,7 @@ int main (int argc, char **argv) { look_hits = 0; dict_lookup_grep (dict, ipf_ptr, range, NULL, - &max_pos, srange, grep_handle); + &max_pos, srange, grep_handler); if (look_hits) no_of_hits++; else @@ -314,36 +270,60 @@ int main (int argc, char **argv) } fclose (ipf); } + if (rw && delete_term) + { + yaz_log (YLOG_LOG, "dict_delete_subtree %s", delete_term); + dict_delete_subtree (dict, delete_term, 0, 0); + } if (grep_pattern) { if (range < 0) range = 0; - logf (LOG_LOG, "Grepping '%s'", grep_pattern); + yaz_log (YLOG_LOG, "Grepping '%s'", grep_pattern); dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos, - srange, grep_handle); + srange, grep_handler); } if (rw) { - logf (LOG_LOG, "Iterations.... %d", no_of_iterations); + yaz_log (YLOG_LOG, "Iterations.... %d", no_of_iterations); if (do_delete) { - logf (LOG_LOG, "No of deleted. %d", no_of_deleted); - logf (LOG_LOG, "No not found.. %d", no_not_found); + yaz_log (YLOG_LOG, "No of deleted. %d", no_of_deleted); + yaz_log (YLOG_LOG, "No not found.. %d", no_not_found); } else { - logf (LOG_LOG, "No of new..... %d", no_of_new); - logf (LOG_LOG, "No of change.. %d", no_of_change); + yaz_log (YLOG_LOG, "No of new..... %d", no_of_new); + yaz_log (YLOG_LOG, "No of change.. %d", no_of_change); } } else { - logf (LOG_LOG, "Lookups....... %d", no_of_iterations); - logf (LOG_LOG, "No of hits.... %d", no_of_hits); - logf (LOG_LOG, "No of misses.. %d", no_of_misses); + yaz_log (YLOG_LOG, "Lookups....... %d", no_of_iterations); + yaz_log (YLOG_LOG, "No of hits.... %d", no_of_hits); + yaz_log (YLOG_LOG, "No of misses.. %d", no_of_misses); + } + if (scan_the_thing) + { + char term_dict[1024]; + + int before = 1000000; + int after = 1000000; + yaz_log (YLOG_LOG, "dict_scan"); + term_dict[0] = 1; + term_dict[1] = 0; + dict_scan (dict, term_dict, &before, &after, 0, scan_handler); } dict_close (dict); bfs_destroy (bfs); res_close (my_resource); return 0; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +