Using the new ylog.h everywhere, and fixing what that breaks!
[idzebra-moved-to-github.git] / dict / dicttest.c
index 86a733d..c5fa828 100644 (file)
-/*
- * Copyright (C) 1994, Index Data I/S 
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: dicttest.c,v $
- * Revision 1.1  1994-08-16 16:26:47  adam
- * Added dict.
- *
- */
+/* $Id: dicttest.c,v 1.30 2004-11-19 10:26:55 heikki Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+   Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
 
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <ctype.h>
 
 #include <dict.h>
-#include <options.h>
+#include <zebrautl.h>
 
 char *prog;
-Dict dict;
+static Dict dict;
+
+static int look_hits;
+
+static int grep_handler (char *name, const char *info, void *client)
+{
+    look_hits++;
+    printf ("%s\n", name);
+    return 0;
+}
+
+static int scan_handler (char *name, const char *info, int pos, void *client)
+{
+    printf ("%s\n", name);
+    return 0;
+}
 
 int main (int argc, char **argv)
 {
+    Res my_resource = 0;
+    BFiles bfs;
     const char *name = NULL;
     const char *inputfile = NULL;
+    const char *config = NULL;
+    const char *delete_term = NULL;
+    int scan_the_thing = 0;
+    int do_delete = 0;
+    int range = -1;
+    int srange = 0;
     int rw = 0;
+    int infosize = 4;
     int cache = 10;
     int ret;
-    int verbose = 0;
+    int unique = 0;
+    char *grep_pattern = NULL;
     char *arg;
+    int no_of_iterations = 0;
+    int no_of_new = 0, no_of_same = 0, no_of_change = 0;
+    int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0;
+    int max_pos;
     
     prog = argv[0];
     if (argc < 2)
     {
-        fprintf (stderr, "usage:\n"
-                         "  %s [-v n] [-i f] [-w] [-c n] file\n", prog);
+        fprintf (stderr, "usage:\n "
+                 " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
+                 "[-v n] [-i f] [-w] [-c n] config file\n\n",
+                 prog);
+        fprintf (stderr, "  -d      delete instead of insert\n");
+        fprintf (stderr, "  -D t    delete subtree instead of insert\n");
+        fprintf (stderr, "  -r n    set regular match range\n");
+        fprintf (stderr, "  -p n    set regular match start range\n");
+        fprintf (stderr, "  -u      report if keys change during insert\n");
+        fprintf (stderr, "  -g p    try pattern n (see -r)\n");
+        fprintf (stderr, "  -s n    set info size to n (instead of 4)\n");
+        fprintf (stderr, "  -v n    set logging level\n");
+        fprintf (stderr, "  -i f    read file with words\n");
+        fprintf (stderr, "  -w      insert/delete instead of lookup\n");
+        fprintf (stderr, "  -c n    cache size (number of pages)\n");
+        fprintf (stderr, "  -S      scan the dictionary\n");
         exit (1);
     }
-    while ((ret = options ("v:i:wc:", argv, argc, &arg)) != -2)
+    while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
     {
         if (ret == 0)
         {
-            if (name)
+            if (!config)
+                config = arg;
+            else if (!name)
+                name = arg;
+            else
             {
-                fprintf (stderr, "%s: too many files specified\n", prog);
+                yaz_log (YLOG_FATAL, "too many files specified\n");
                 exit (1);
             }
-            name = arg;
+        }
+       else if (ret == 'D')
+       {
+           delete_term = arg;
+       }
+        else if (ret == 'd')
+            do_delete = 1;
+        else if (ret == 'g')
+        {
+            grep_pattern = arg;
+        }
+        else if (ret == 'r')
+        {
+            range = atoi (arg);
+        }
+        else if (ret == 'p')
+        {
+            srange = atoi (arg);
+        }
+        else if (ret == 'u')
+        {
+            unique = 1;
         }
         else if (ret == 'c')
         {
@@ -56,56 +138,182 @@ int main (int argc, char **argv)
         else if (ret == 'w')
             rw = 1;
         else if (ret == 'i')
-        {
             inputfile = arg;
-            rw = 1;
+       else if (ret == 'S')
+           scan_the_thing = 1;
+        else if (ret == 's')
+        {
+            infosize = atoi(arg);
         }
         else if (ret == 'v')
-            verbose = atoi(arg);
+        {
+            yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
+        }
         else
         {
-            fprintf (stderr, "%s: unknown option\n", prog);
+            yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
             exit (1);
         }
     }
-    if (!name)
+    if (!config || !name)
+    {
+        yaz_log (YLOG_FATAL, "no config and/or dictionary specified");
+        exit (1);
+    }
+    my_resource = res_open (config, 0, 0);
+    if (!my_resource)
+    {
+        yaz_log (YLOG_FATAL, "cannot open resource `%s'", config);
+        exit (1);
+    }
+    bfs = bfs_create (res_get(my_resource, "register"), 0);
+    if (!bfs)
     {
-        fprintf (stderr, "%s: no dictionary file given\n", prog);
+        yaz_log (YLOG_FATAL, "bfs_create fail");
         exit (1);
     }
-    dict = dict_open (name, cache, rw);
+    dict = dict_open (bfs, name, cache, rw, 0, 4096);
     if (!dict)
     {
-        fprintf (stderr, "%s: dict_open fail\n", prog);
+        yaz_log (YLOG_FATAL, "dict_open fail of `%s'", name);
         exit (1);
     }
     if (inputfile)
     {
         FILE *ipf;
-        char ipf_buf[256];
-        char word[256];
-        int i, line = 1;
+        char ipf_buf[1024];
+        int line = 1;
+        char infobytes[120];
+        memset (infobytes, 0, 120);
 
         if (!(ipf = fopen(inputfile, "r")))
         {
-            fprintf (stderr, "%s: cannot open %s\n", prog, inputfile);
+            yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open %s", inputfile);
             exit (1);
         }
         
-        while (fgets (ipf_buf, 255, ipf))
+        while (fgets (ipf_buf, 1023, ipf))
         {
-            for (i=0; i<255; i++)
-                if (ipf_buf[i] > ' ')
-                    word[i] = ipf_buf[i];
-                else
-                    break;
-            word[i] = 0;
-            if (i)
-                dict_insert (dict, word, &line);
+            char *ipf_ptr = ipf_buf;
+            sprintf (infobytes, "%d", line);
+            for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++)
+            {
+                if (isalpha(*ipf_ptr) || *ipf_ptr == '_')
+                {
+                    int i = 1;
+                    while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) ||
+                                          ipf_ptr[i] == '_'))
+                        i++;
+                    if (ipf_ptr[i])
+                        ipf_ptr[i++] = '\0';
+                    if (rw)
+                    {
+                       if (do_delete)
+                            switch (dict_delete (dict, ipf_ptr))
+                            {
+                            case 0:
+                                no_not_found++;
+                                break;
+                            case 1:
+                                no_of_deleted++;
+                            }
+                        else
+                            switch(dict_insert (dict, ipf_ptr,
+                                                infosize, infobytes))
+                            {
+                            case 0:
+                                no_of_new++;
+                                break;
+                            case 1:
+                                no_of_change++;
+                                if (unique)
+                                    yaz_log (YLOG_LOG, "%s change\n", ipf_ptr);
+                                break;
+                            case 2:
+                                if (unique)
+                                    yaz_log (YLOG_LOG, "%s duplicate\n", ipf_ptr);
+                                no_of_same++;
+                                break;
+                            }
+                    }
+                    else if(range < 0)
+                    {
+                        char *cp;
+
+                        cp = dict_lookup (dict, ipf_ptr);
+                        if (cp && *cp)
+                            no_of_hits++;
+                        else
+                            no_of_misses++;
+                    }
+                    else
+                    {
+                        look_hits = 0;
+                        dict_lookup_grep (dict, ipf_ptr, range, NULL,
+                                          &max_pos, srange, grep_handler);
+                        if (look_hits)
+                            no_of_hits++;
+                        else
+                            no_of_misses++;
+                    }
+                    ++no_of_iterations;
+                   if ((no_of_iterations % 10000) == 0)
+                   {
+                       printf ("."); fflush(stdout);
+                   }
+                    ipf_ptr += (i-1);
+                }
+            }
             ++line;
         }
         fclose (ipf);
     }
+    if (rw && delete_term)
+    {
+       yaz_log (YLOG_LOG, "dict_delete_subtree %s", delete_term);
+       dict_delete_subtree (dict, delete_term, 0, 0);
+    }
+    if (grep_pattern)
+    {
+        if (range < 0)
+            range = 0;
+        yaz_log (YLOG_LOG, "Grepping '%s'", grep_pattern);
+        dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
+                          srange, grep_handler);
+    }
+    if (rw)
+    {
+        yaz_log (YLOG_LOG, "Iterations.... %d", no_of_iterations);            
+        if (do_delete)
+        {
+            yaz_log (YLOG_LOG, "No of deleted. %d", no_of_deleted);
+            yaz_log (YLOG_LOG, "No not found.. %d", no_not_found);
+        }
+        else
+        {
+            yaz_log (YLOG_LOG, "No of new..... %d", no_of_new);
+            yaz_log (YLOG_LOG, "No of change.. %d", no_of_change);
+        }
+    }
+    else
+    {
+        yaz_log (YLOG_LOG, "Lookups....... %d", no_of_iterations);
+        yaz_log (YLOG_LOG, "No of hits.... %d", no_of_hits);
+        yaz_log (YLOG_LOG, "No of misses.. %d", no_of_misses);
+    }
+    if (scan_the_thing)
+    {
+       char term_dict[1024];
+        
+       int before = 1000000;
+       int after = 1000000;
+       yaz_log (YLOG_LOG, "dict_scan");
+       term_dict[0] = 1;
+       term_dict[1] = 0;
+       dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
+    }
     dict_close (dict);
+    bfs_destroy (bfs);
+    res_close (my_resource);
     return 0;
 }