X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dict%2Fdicttest.c;h=849111e54e5ad740c35e073d4afd44c46616df3d;hb=3e97ca3e99068d83691ad6d43d53dd9f1f316889;hp=8d8f861f19ef8c8250a95dd522617a0eb15cb2e3;hpb=02ac0a77d27046442a63371dbf37ee5c0c452dee;p=idzebra-moved-to-github.git diff --git a/dict/dicttest.c b/dict/dicttest.c index 8d8f861..849111e 100644 --- a/dict/dicttest.c +++ b/dict/dicttest.c @@ -1,10 +1,51 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dicttest.c,v $ - * Revision 1.8 1994-09-22 10:43:44 adam + * Revision 1.20 1996-03-20 09:35:16 adam + * Function dict_lookup_grep got extra parameter, init_pos, which marks + * from which position in pattern approximate pattern matching should occur. + * + * Revision 1.19 1996/02/02 13:43:50 adam + * The public functions simply use char instead of Dict_char to represent + * search strings. Dict_char is used internally only. + * + * Revision 1.18 1996/02/01 20:39:52 adam + * Bug fix: insert didn't work on 8-bit characters due to unsigned char + * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is + * unsigned now. + * + * Revision 1.17 1995/12/06 17:48:30 adam + * Bug fix: delete didn't work. + * + * Revision 1.16 1995/10/09 16:18:31 adam + * Function dict_lookup_grep got extra client data parameter. + * + * Revision 1.15 1995/09/04 12:33:31 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.14 1994/10/04 17:46:55 adam + * Function options now returns arg with error option. + * + * Revision 1.13 1994/10/04 12:08:05 adam + * Some bug fixes and some optimizations. + * + * Revision 1.12 1994/10/03 17:23:03 adam + * First version of dictionary lookup with regular expressions and errors. + * + * Revision 1.11 1994/09/28 13:07:09 adam + * Use log_mask_str now. + * + * Revision 1.10 1994/09/26 10:17:24 adam + * Minor changes. + * + * Revision 1.9 1994/09/22 14:43:56 adam + * First functional version of lookup with error correction. A 'range' + * specified the maximum number of insertions+deletions+substitutions. + * + * Revision 1.8 1994/09/22 10:43:44 adam * Two versions of depend. Type 1 is the tail-type compatible with * all make programs. Type 2 is the GNU make with include facility. * Type 2 is default. depend rule chooses current rule. @@ -42,10 +83,13 @@ #include char *prog; -Dict dict; +static Dict dict; -static int lookup_handle (Dict_char *name) +static int look_hits; + +static int grep_handle (char *name, const char *info, void *client) { + look_hits++; printf ("%s\n", name); return 0; } @@ -55,27 +99,41 @@ int main (int argc, char **argv) const char *name = NULL; const char *inputfile = NULL; const char *base = NULL; + int do_delete = 0; int range = -1; + int srange = 0; int rw = 0; int infosize = 4; int cache = 10; int ret; - int no_of_iterations = 0; - int no_of_new = 0, no_of_same = 0, no_of_change = 0; - int no_of_hits = 0, no_of_misses = 0; int unique = 0; + char *grep_pattern = NULL; char *arg; + int no_of_iterations = 0; + int no_of_new = 0, no_of_same = 0, no_of_change = 0; + int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0; + int max_pos; prog = argv[0]; if (argc < 2) { fprintf (stderr, "usage:\n " - " %s [-r n] [-u] [-s n] [-v n] [-i f] [-w] [-c n]" - " base file\n", + " %s [-d] [-r n] [-p n] [-u] [-g pat] [-s n] [-v n] [-i f]" + " [-w] [-c n] base file\n\n", prog); + fprintf (stderr, " -d delete instead of insert\n"); + fprintf (stderr, " -r n set regular match range\n"); + fprintf (stderr, " -p n set regular match start range\n"); + fprintf (stderr, " -u report if keys change during insert\n"); + fprintf (stderr, " -g p try pattern n (see -r)\n"); + fprintf (stderr, " -s n set info size to n (instead of 4)\n"); + fprintf (stderr, " -v n set logging level\n"); + fprintf (stderr, " -i f read file with words\n"); + fprintf (stderr, " -w insert/delete instead of lookup\n"); + fprintf (stderr, " -c n cache size (number of pages)\n"); exit (1); } - while ((ret = options ("r:us:v:i:wc:", argv, argc, &arg)) != -2) + while ((ret = options ("dr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -85,14 +143,24 @@ int main (int argc, char **argv) name = arg; else { - log (LOG_FATAL, "too many files specified\n"); + logf (LOG_FATAL, "too many files specified\n"); exit (1); } } + else if (ret == 'd') + do_delete = 1; + else if (ret == 'g') + { + grep_pattern = arg; + } else if (ret == 'r') { range = atoi (arg); } + else if (ret == 'p') + { + srange = atoi (arg); + } else if (ret == 'u') { unique = 1; @@ -113,29 +181,29 @@ int main (int argc, char **argv) } else if (ret == 'v') { - log_init (atoi(arg), prog, NULL); + log_init (log_mask_str(arg), prog, NULL); } else { - log (LOG_FATAL, "unknown option"); + logf (LOG_FATAL, "Unknown option '-%s'", arg); exit (1); } } if (!base || !name) { - log (LOG_FATAL, "no base and/or dictionary specified"); + logf (LOG_FATAL, "no base and/or dictionary specified"); exit (1); } common_resource = res_open (base); if (!common_resource) { - log (LOG_FATAL, "cannot open resource `%s'", base); + logf (LOG_FATAL, "cannot open resource `%s'", base); exit (1); } dict = dict_open (name, cache, rw); if (!dict) { - log (LOG_FATAL, "dict_open fail of `%s'", name); + logf (LOG_FATAL, "dict_open fail of `%s'", name); exit (1); } if (inputfile) @@ -148,7 +216,7 @@ int main (int argc, char **argv) if (!(ipf = fopen(inputfile, "r"))) { - log (LOG_FATAL|LOG_ERRNO, "cannot open %s", inputfile); + logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", inputfile); exit (1); } @@ -168,37 +236,53 @@ int main (int argc, char **argv) ipf_ptr[i++] = '\0'; if (rw) { - switch(dict_insert (dict, ipf_ptr, - infosize, infobytes)) - { - case 0: - no_of_new++; - break; - case 1: - no_of_change++; - if (unique) - log (LOG_LOG, "%s change\n", ipf_ptr); - break; - case 2: - if (unique) - log (LOG_LOG, "%s duplicate\n", ipf_ptr); - no_of_same++; - break; - } + if (do_delete) + switch (dict_delete (dict, ipf_ptr)) + { + case 0: + no_not_found++; + break; + case 1: + no_of_deleted++; + } + else + switch(dict_insert (dict, ipf_ptr, + infosize, infobytes)) + { + case 0: + no_of_new++; + break; + case 1: + no_of_change++; + if (unique) + logf (LOG_LOG, "%s change\n", ipf_ptr); + break; + case 2: + if (unique) + logf (LOG_LOG, "%s duplicate\n", ipf_ptr); + no_of_same++; + break; + } } else if(range < 0) { char *cp; cp = dict_lookup (dict, ipf_ptr); - if (cp) + if (cp && *cp) no_of_hits++; else no_of_misses++; } else { - dict_lookup_ec (dict, ipf_ptr, range, lookup_handle); + look_hits = 0; + dict_lookup_grep (dict, ipf_ptr, range, NULL, + &max_pos, srange, grep_handle); + if (look_hits) + no_of_hits++; + else + no_of_misses++; } ++no_of_iterations; ipf_ptr += (i-1); @@ -208,18 +292,33 @@ int main (int argc, char **argv) } fclose (ipf); } + if (grep_pattern) + { + if (range < 0) + range = 0; + logf (LOG_LOG, "Grepping '%s'", grep_pattern); + dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos, + srange, grep_handle); + } if (rw) { - log (LOG_LOG, "Insertions.... %d", no_of_iterations); - log (LOG_LOG, "No of new..... %d", no_of_new); - log (LOG_LOG, "No of change.. %d", no_of_change); - log (LOG_LOG, "No of same.... %d", no_of_same); + logf (LOG_LOG, "Iterations.... %d", no_of_iterations); + if (do_delete) + { + logf (LOG_LOG, "No of deleted. %d", no_of_deleted); + logf (LOG_LOG, "No not found.. %d", no_not_found); + } + else + { + logf (LOG_LOG, "No of new..... %d", no_of_new); + logf (LOG_LOG, "No of change.. %d", no_of_change); + } } else { - log (LOG_LOG, "Lookups....... %d", no_of_iterations); - log (LOG_LOG, "No of hits.... %d", no_of_hits); - log (LOG_LOG, "No of misses.. %d", no_of_misses); + logf (LOG_LOG, "Lookups....... %d", no_of_iterations); + logf (LOG_LOG, "No of hits.... %d", no_of_hits); + logf (LOG_LOG, "No of misses.. %d", no_of_misses); } dict_close (dict); res_close (common_resource);