Added a few prototypes.
[idzebra-moved-to-github.git] / index / main.c
index 87f7c16..c29e4c0 100644 (file)
 /*
- * Copyright (C) 1994, Index Data I/S 
+ * Copyright (C) 1994-2001, Index Data
  * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: main.c,v $
- * Revision 1.1  1995-08-31 14:50:24  adam
- * New simple file index tool.
  *
+ * $Id: main.c,v 1.80 2001-11-19 23:05:22 adam Exp $
  */
 #include <stdio.h>
+#include <string.h>
 #include <assert.h>
+#ifdef WIN32
+#include <io.h>
+#else
 #include <unistd.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <ctype.h>
+#endif
 
-#include <util.h>
+#include <yaz/data1.h>
 #include "index.h"
+#include "recindex.h"
 
-char *prog;
-
-static int key_fd = -1;
-#define KEY_BUF_SIZE 100000
-static char *key_buf;
-int key_offset;
-SYSNO sysno_next;
-Dict file_idx;
-static char *base_path = NULL;
-
-void key_open (const char *fname)
-{
-    void *file_key;
-    if (key_fd != -1)
-        return;
-    if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1)
-    {
-        log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname);
-        exit (1);
-    }
-    if (!(key_buf = malloc (KEY_BUF_SIZE)))
-    {
-        log (LOG_FATAL|LOG_ERRNO, "malloc");
-        exit (1);
-    }
-    key_offset = 0;
-    if (!(file_idx = dict_open ("fileidx", 10, 1)))
-    {
-        log (LOG_FATAL, "dict_open fail of %s", "fileidx");
-        exit (1);
-    }
-    file_key = dict_lookup (file_idx, ".");
-    if (file_key)
-        memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next));
-    else
-        sysno_next = 1;
-}
+#ifndef ZEBRASDR
+#define ZEBRASDR 0
+#endif
 
-void key_close (void)
-{
-    if (key_fd == -1)
-        return;
-    close (key_fd);
-    dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next);
-    dict_close (file_idx);
-    key_fd = -1;
-}
+#if ZEBRASDR
+#include "zebrasdr.h"
+#endif
 
-void key_flush (void)
-{
-    size_t i = 0;
-    int w;
-    
-    while (i < key_offset)
-    {
-        w = write (key_fd, key_buf + i, key_offset - i);
-        if (w == -1)
-        {
-            log (LOG_FATAL|LOG_ERRNO, "Write key fail");
-            exit (1);
-        }
-        i += w;
-    }
-    key_offset = 0;
-}
+char *prog;
 
-void key_write (int cmd, struct it_key *k, const char *str)
-{
-    char x = cmd;
-    size_t slen = strlen(str);
+Res common_resource = 0;
 
-    if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2)
-        key_flush ();
-    memcpy (key_buf + key_offset, &x, 1);
-    key_offset++;
-    memcpy (key_buf + key_offset, k, sizeof(*k));
-    key_offset += sizeof(*k);
-    memcpy (key_buf + key_offset, str, slen+1);
-    key_offset += slen+1;
-}
 
-void text_extract (SYSNO sysno, int cmd, const char *fname)
+int main (int argc, char **argv)
 {
-    FILE *inf;
-    struct it_key k;
-    int seqno = 1;
-    int c;
-    char w[256];
-
-    log (LOG_DEBUG, "Text extract of %d", sysno);
-    k.sysno = sysno;
-    inf = fopen (fname, "r");
-    if (!inf)
-    {
-        log (LOG_WARN|LOG_ERRNO, "open %s", fname);
-        return;
-    }
-    while ((c=getc (inf)) != EOF)
-    {
-        int i = 0;
-        while (i < 254 && c != EOF && isalnum(c))
-        {
-            w[i++] = c;
-            c = getc (inf);
-        }
-        if (i)
-        {
-            w[i] = 0;
-            
-            k.seqno = seqno++;
-            k.field = 0;
-            key_write (cmd, &k, w);
-        }
-        if (c == EOF)
-            break;
-    }
-    fclose (inf);
-}
+    int ret;
+    int cmd = 0;
+    char *arg;
+    char *configName = FNAME_CONFIG;
+    int nsections = 0;
+    int disableCommit = 0;
+    size_t mem_max = 0;
+    char nbuf[100];
+    struct recordGroup rGroupDef;
 
-void file_extract (int cmd, struct stat *fs, const char *fname)
-{
-    int i;
-    char ext[128];
-    SYSNO sysno;
-    char ext_res[128];
-    const char *file_type;
-    void *file_info;
+    nmem_init ();
 
-    log (LOG_DEBUG, "%c %s", cmd, fname);
-    for (i = strlen(fname); --i >= 0; )
-        if (fname[i] == '/')
-        {
-            strcpy (ext, "");
-            break;
-        }
-        else if (fname[i] == '.')
-        {
-            strcpy (ext, fname+i+1);
-            break;
-        }
-    sprintf (ext_res, "fileExtension.%s", ext);
-    if (!(file_type = res_get (common_resource, ext_res)))
-        return;
-    
-    file_info = dict_lookup (file_idx, fname);
-    if (!file_info)
-    {
-        sysno = sysno_next++;
-        dict_insert (file_idx, fname, sizeof(sysno), &sysno);
-    }
-    else
-        memcpy (&sysno, (char*) file_info+1, sizeof(sysno));
-    if (!strcmp (file_type, "text"))
-        text_extract (sysno, cmd, fname);
-}
+#ifdef WIN32
+#else
+    sprintf(nbuf, "%.40s(%d)", *argv, getpid());
+    yaz_log_init_prefix (nbuf);
+#endif
 
-static void repository_extract_r (int cmd, char *rep)
-{
-    DIR *dir;
-    size_t rep_len;
-    struct dirent *dent;
-    struct stat fs;
+#if ZEBRASDR
+    zebraSdr_std ();
+    rGroupDef.useSDR = 0;
+#endif
+    rGroupDef.groupName = NULL;
+    rGroupDef.databaseName = NULL;
+    rGroupDef.path = NULL;
+    rGroupDef.recordId = NULL;
+    rGroupDef.recordType = NULL;
+    rGroupDef.flagStoreData = -1;
+    rGroupDef.flagStoreKeys = -1;
+    rGroupDef.flagRw = 1;
+    rGroupDef.databaseNamePath = 0;
+    rGroupDef.explainDatabase = 0;
+    rGroupDef.fileVerboseLimit = 100000;
+    rGroupDef.zebra_maps = NULL;
+    rGroupDef.dh = data1_create ();
+    rGroupDef.recTypes = recTypes_init (rGroupDef.dh);
+    recTypes_default_handlers (rGroupDef.recTypes);
 
-    rep_len = strlen(rep);
-    dir = opendir(rep);
-    if (!dir)
+    prog = *argv;
+    if (argc < 2)
     {
-        if (errno == ENOENT)
-        {
-            log (LOG_WARN|LOG_ERRNO, "opendir %s", rep);
-            return;
-        }
-        log (LOG_FATAL|LOG_ERRNO, "opendir %s", rep);
+        fprintf (stderr, "%s [options] command <dir> ...\n"
+        "Commands:\n"
+        " update <dir>  Update index with files below <dir>.\n"
+       "               If <dir> is empty filenames are read from stdin.\n"
+        " delete <dir>  Delete index with files below <dir>.\n"
+        " commit        Commit changes\n"
+        " clean         Clean shadow files\n"
+        "Options:\n"
+       " -t <type>     Index files as <type> (grs or text).\n"
+       " -c <config>   Read configuration file <config>.\n"
+       " -g <group>    Index files according to group settings.\n"
+       " -d <database> Records belong to Z39.50 database <database>.\n"
+       " -m <mbytes>   Use <mbytes> before flushing keys to disk.\n"
+        " -n            Don't use shadow system.\n"
+       " -s            Show analysis on stdout, but do no work.\n"
+       " -v <level>    Set logging to <level>.\n"
+        " -l <file>     Write log to <file>.\n"
+        " -f <n>        Display information for the first <n> records.\n"
+#if ZEBRASDR
+       " -S            Use SDRKit\n"
+#endif
+        " -V            Show version.\n", *argv
+                 );
         exit (1);
     }
-    if (rep[rep_len-1] != '/')
-        rep[rep_len] = '/';
-    else
-        --rep_len;
-    while ((dent = readdir (dir)))
-    {
-        strcpy (rep +rep_len+1, dent->d_name);
-        stat (rep, &fs);
-        switch (fs.st_mode & S_IFMT)
-        {
-        case S_IFREG:
-            file_extract (cmd, &fs, rep);
-            break;
-        case S_IFDIR:
-            if (strcmp (dent->d_name, ".") && strcmp(dent->d_name, ".."))
-                repository_extract_r (cmd, rep);
-            break;
-        }
-    }
-    closedir (dir);
-}
-
-void repository_update_r (int cmd, const char *rep, const char *with_rep)
-{
-
-}
-
-void repository_traverse (int cmd, const char *rep)
-{
-    char rep_tmp1[2048];
-    char rep_tmp2[2048];
-
-    strcpy (rep_tmp1, rep);
-    if (base_path)
-    {
-        strcpy (rep_tmp2, base_path);
-        repository_update_r (cmd, rep_tmp1, rep_tmp2);
-    }
-    else
-        repository_extract_r (cmd, rep_tmp1);
-}
-
-
-int main (int argc, char **argv)
-{
-    int ret;
-    int cmd = 0;
-    char *arg;
-    char *base_name;
-
-    prog = *argv;
-    while ((ret = options ("b:v:", argv, argc, &arg)) != -2)
+    while ((ret = options ("sVt:c:g:d:m:v:nf:l:"
+#if ZEBRASDR
+                          "S"
+#endif
+                          , argv, argc, &arg)) != -2)
     {
         if (ret == 0)
         {
-            if (!base_name)
+            const char *rval;
+            if(cmd == 0) /* command */
             {
-                base_name = arg;
-
-                common_resource = res_open (base_name);
                 if (!common_resource)
                 {
-                    log (LOG_FATAL, "Cannot open resource `%s'", base_name);
-                    exit (1);
+#if ZMBOL
+                    logf (LOG_LOG, "zmbol version %s %s",
+                          ZEBRAVER, ZEBRADATE);
+#else
+                    logf (LOG_LOG, "zebra version %s %s",
+                          ZEBRAVER, ZEBRADATE);
+#endif
+                    common_resource = res_open (configName ?
+                                                configName : FNAME_CONFIG);
+                    if (!common_resource)
+                    {
+                        logf (LOG_FATAL, "cannot read file `%s'", configName);
+                        exit (1);
+                    }
+                    data1_set_tabpath (rGroupDef.dh, res_get (common_resource,
+                                                             "profilePath"));
+
+                   rGroupDef.bfs =
+                       bfs_create (res_get (common_resource, "register"));
+                    if (!rGroupDef.bfs)
+                    {
+                        logf (LOG_FATAL, "Cannot access register");
+                        exit(1);
+                    }
+
+                    bf_lockDir (rGroupDef.bfs,
+                               res_get (common_resource, "lockDir"));
+                   rGroupDef.zebra_maps = zebra_maps_open (common_resource);
                 }
-            }
-            else if(cmd == 0) /* command */
-            {
-                if (!strcmp (arg, "add"))
+                if (!strcmp (arg, "update"))
+                    cmd = 'u';
+                else if (!strcmp (arg, "update1"))
+                    cmd = 'U';
+                else if (!strcmp (arg, "update2"))
+                    cmd = 'm';
+                else if (!strcmp (arg, "dump"))
+                    cmd = 's';
+                else if (!strcmp (arg, "del") || !strcmp(arg, "delete"))
+                    cmd = 'd';
+               else if (!strcmp (arg, "init"))
+               {
+                   zebraIndexUnlock(); 
+                   rval = res_get (common_resource, "shadow");
+                   zebraIndexLock (rGroupDef.bfs, 0, rval);
+                   if (rval && *rval)
+                       bf_cache (rGroupDef.bfs, rval);
+                   zebraIndexLockMsg ("w");
+                   bf_reset (rGroupDef.bfs);
+               }
+                else if (!strcmp (arg, "commit"))
                 {
-                    cmd = 'a';
+                    rval = res_get (common_resource, "shadow");
+                    zebraIndexLock (rGroupDef.bfs, 1, rval);
+                    if (rval && *rval)
+                        bf_cache (rGroupDef.bfs, rval);
+                    else
+                    {
+                        logf (LOG_FATAL, "Cannot perform commit");
+                        logf (LOG_FATAL, "No shadow area defined");
+                        exit (1);
+                    }
+                    if (bf_commitExists (rGroupDef.bfs))
+                    {
+                        logf (LOG_LOG, "commit start");
+                        zebraIndexLockMsg ("c");
+                        zebraIndexWait (1);
+                        logf (LOG_LOG, "commit execute");
+                        bf_commitExec (rGroupDef.bfs);
+#ifndef WIN32
+                        sync ();
+#endif
+                        zebraIndexLockMsg ("d");
+                        zebraIndexWait (0);
+                        logf (LOG_LOG, "commit clean");
+                        bf_commitClean (rGroupDef.bfs, rval);
+                    }
+                    else
+                        logf (LOG_LOG, "nothing to commit");
                 }
-                else if (!strcmp (arg, "del"))
+                else if (!strcmp (arg, "clean"))
                 {
-                    cmd = 'd';
+                    rval = res_get (common_resource, "shadow");
+                    zebraIndexLock (rGroupDef.bfs, 1, rval);
+                    if (bf_commitExists (rGroupDef.bfs))
+                    {
+                        zebraIndexLockMsg ("d");
+                        zebraIndexWait (0);
+                        logf (LOG_LOG, "commit clean");
+                        bf_commitClean (rGroupDef.bfs, rval);
+                    }
+                    else
+                        logf (LOG_LOG, "nothing to clean");
+                }
+                else if (!strcmp (arg, "stat") || !strcmp (arg, "status"))
+                {
+                   Records records;
+                    rval = res_get (common_resource, "shadow");
+                    zebraIndexLock (rGroupDef.bfs, 0, rval);
+                    if (rval && *rval)
+                    {
+                        bf_cache (rGroupDef.bfs, rval);
+                        zebraIndexLockMsg ("r");
+                    }
+                   records = rec_open (rGroupDef.bfs, 0, 0);
+                    rec_prstat (records);
+                   rec_close (&records);
+                    inv_prstat (rGroupDef.bfs);
+                }
+                else if (!strcmp (arg, "compact"))
+                {
+                    rval = res_get (common_resource, "shadow");
+                    zebraIndexLock (rGroupDef.bfs, 0, rval);
+                    if (rval && *rval)
+                    {
+                        bf_cache (rGroupDef.bfs, rval);
+                        zebraIndexLockMsg ("r");
+                    }
+                    inv_compact(rGroupDef.bfs);
                 }
                 else
                 {
-                    log (LOG_FATAL, "Unknown command: %s", arg);
+                    logf (LOG_FATAL, "unknown command: %s", arg);
                     exit (1);
                 }
             }
-            else
+           else
             {
-                key_open ("keys.tmp");
-                repository_traverse (cmd, arg);
+                struct recordGroup rGroup;
+#if ZMBOL
+#else
+               /* For zebra, delete lock file and reset register */
+               if (rGroupDef.flagRw)
+               {
+                   zebraIndexUnlock();
+                   bf_reset (rGroupDef.bfs);
+               }
+#endif
+                rval = res_get (common_resource, "shadow");
+                zebraIndexLock (rGroupDef.bfs, 0, rval);
+               if (rGroupDef.flagRw)
+               {
+                   if (rval && *rval && !disableCommit)
+                   {
+                       bf_cache (rGroupDef.bfs, rval);
+                       zebraIndexLockMsg ("r");
+                   }
+                   else
+                   {
+                       bf_cache (rGroupDef.bfs, 0);
+                       zebraIndexLockMsg ("w");
+                   }
+                   zebraIndexWait (0);
+               }
+                memcpy (&rGroup, &rGroupDef, sizeof(rGroup));
+                rGroup.path = arg;
+                switch (cmd)
+                {
+                case 'u':
+                    if (!key_open (&rGroup, mem_max))
+                   {
+                       logf (LOG_LOG, "updating %s", rGroup.path);
+                       repositoryUpdate (&rGroup);
+                       nsections = key_close (&rGroup);
+                   }
+                    break;
+                case 'U':
+                    if (!key_open (&rGroup, mem_max))
+                   {
+                       logf (LOG_LOG, "updating (pass 1) %s", rGroup.path);
+                       repositoryUpdate (&rGroup);
+                       key_close (&rGroup);
+                   }
+                    nsections = 0;
+                    break;
+                case 'd':
+                    if (!key_open (&rGroup,mem_max))
+                   {
+                       logf (LOG_LOG, "deleting %s", rGroup.path);
+                       repositoryDelete (&rGroup);
+                       nsections = key_close (&rGroup);
+                   }
+                    break;
+                case 's':
+                    logf (LOG_LOG, "dumping %s", rGroup.path);
+                    repositoryShow (&rGroup);
+                    nsections = 0;
+                    break;
+                case 'm':
+                    nsections = -1;
+                    break;
+                default:
+                    nsections = 0;
+                }
                 cmd = 0;
+                if (nsections)
+                {
+                    logf (LOG_LOG, "merging with index");
+                    key_input (rGroup.bfs, nsections, 60, common_resource);
+#ifndef WIN32
+                    sync ();
+#endif
+                }
+                log_event_end (NULL, NULL);
             }
         }
-        else if (ret == 'v')
-        {
-            log_init (log_mask_str(arg), prog, NULL);
-        }
-        else if (ret == 'b')
+        else if (ret == 'V')
         {
-            base_path = arg;
+#if ZMBOL
+            fprintf (stderr, "Z'mbol %s %s\n", ZEBRAVER, ZEBRADATE);
+#else
+            fprintf (stderr, "Zebra %s %s\n", ZEBRAVER, ZEBRADATE);
+#endif
+           fprintf (stderr, " (C) 1994-2001, Index Data ApS\n");
+#ifdef WIN32
+#ifdef _DEBUG
+            fprintf (stderr, " WIN32 Debug\n");
+#else
+            fprintf (stderr, " WIN32 Release\n");
+#endif
+#endif
+#if HAVE_BZLIB_H
+            fprintf (stderr, "libbzip2\n"
+                    " (C) 1996-1999 Julian R Seward.  All rights reserved.\n");
+#endif
         }
+        else if (ret == 'v')
+            yaz_log_init_level (yaz_log_mask_str(arg));
+       else if (ret == 'l')
+           yaz_log_init_file (arg);
+        else if (ret == 'm')
+            mem_max = 1024*1024*atoi(arg);
+        else if (ret == 'd')
+            rGroupDef.databaseName = arg;
+       else if (ret == 's')
+           rGroupDef.flagRw = 0;
+        else if (ret == 'g')
+            rGroupDef.groupName = arg;
+        else if (ret == 'f')
+            rGroupDef.fileVerboseLimit = atoi(arg);
+        else if (ret == 'c')
+            configName = arg;
+        else if (ret == 't')
+            rGroupDef.recordType = arg;
+        else if (ret == 'n')
+            disableCommit = 1;
+#if ZEBRASDR
+       else if (ret == 'S')
+           rGroupDef.useSDR = 1;
+#endif
         else
-        {
-            log (LOG_FATAL, "Unknown option '-%s'", arg);
-            exit (1);
-        }
+            logf (LOG_WARN, "unknown option '-%s'", arg);
     }
-    key_flush ();
-    key_close ();
+    recTypes_destroy (rGroupDef.recTypes);
+    if (common_resource)
+    {
+        zebraIndexUnlock ();
+       bfs_destroy (rGroupDef.bfs);
+    }
+    data1_destroy (rGroupDef.dh);
     exit (0);
+    return 0;
 }
+