X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmain.c;h=60310e449c860a4360694a06a99906cfc0dd8553;hb=3d9f3bffb46938b54ea2aaf3738d8579fdcfdebd;hp=87f7c16f0975a2a9650df5fa02426b383d31ce1d;hpb=4cb66a9e7b322dd58c9d52f1a660a271d6c384f4;p=idzebra-moved-to-github.git diff --git a/index/main.c b/index/main.c index 87f7c16..60310e4 100644 --- a/index/main.c +++ b/index/main.c @@ -1,304 +1,378 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: main.c,v $ - * Revision 1.1 1995-08-31 14:50:24 adam - * New simple file index tool. * + * $Id: main.c,v 1.81 2001-11-19 23:29:09 adam Exp $ */ #include +#include #include +#ifdef WIN32 +#include +#else #include -#include -#include -#include -#include -#include +#endif -#include +#include #include "index.h" +#include "recindex.h" -char *prog; - -static int key_fd = -1; -#define KEY_BUF_SIZE 100000 -static char *key_buf; -int key_offset; -SYSNO sysno_next; -Dict file_idx; -static char *base_path = NULL; - -void key_open (const char *fname) -{ - void *file_key; - if (key_fd != -1) - return; - if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); - exit (1); - } - if (!(key_buf = malloc (KEY_BUF_SIZE))) - { - log (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - key_offset = 0; - if (!(file_idx = dict_open ("fileidx", 10, 1))) - { - log (LOG_FATAL, "dict_open fail of %s", "fileidx"); - exit (1); - } - file_key = dict_lookup (file_idx, "."); - if (file_key) - memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next)); - else - sysno_next = 1; -} +#ifndef ZEBRASDR +#define ZEBRASDR 0 +#endif -void key_close (void) -{ - if (key_fd == -1) - return; - close (key_fd); - dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); - dict_close (file_idx); - key_fd = -1; -} +#if ZEBRASDR +#include "zebrasdr.h" +#endif -void key_flush (void) -{ - size_t i = 0; - int w; - - while (i < key_offset) - { - w = write (key_fd, key_buf + i, key_offset - i); - if (w == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Write key fail"); - exit (1); - } - i += w; - } - key_offset = 0; -} +char *prog; -void key_write (int cmd, struct it_key *k, const char *str) -{ - char x = cmd; - size_t slen = strlen(str); +Res common_resource = 0; - if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2) - key_flush (); - memcpy (key_buf + key_offset, &x, 1); - key_offset++; - memcpy (key_buf + key_offset, k, sizeof(*k)); - key_offset += sizeof(*k); - memcpy (key_buf + key_offset, str, slen+1); - key_offset += slen+1; -} -void text_extract (SYSNO sysno, int cmd, const char *fname) +int main (int argc, char **argv) { - FILE *inf; - struct it_key k; - int seqno = 1; - int c; - char w[256]; - - log (LOG_DEBUG, "Text extract of %d", sysno); - k.sysno = sysno; - inf = fopen (fname, "r"); - if (!inf) - { - log (LOG_WARN|LOG_ERRNO, "open %s", fname); - return; - } - while ((c=getc (inf)) != EOF) - { - int i = 0; - while (i < 254 && c != EOF && isalnum(c)) - { - w[i++] = c; - c = getc (inf); - } - if (i) - { - w[i] = 0; - - k.seqno = seqno++; - k.field = 0; - key_write (cmd, &k, w); - } - if (c == EOF) - break; - } - fclose (inf); -} + int ret; + int cmd = 0; + char *arg; + char *configName = FNAME_CONFIG; + int nsections = 0; + int disableCommit = 0; + size_t mem_max = 0; +#ifndef WIN32 + char nbuf[100]; +#endif + struct recordGroup rGroupDef; -void file_extract (int cmd, struct stat *fs, const char *fname) -{ - int i; - char ext[128]; - SYSNO sysno; - char ext_res[128]; - const char *file_type; - void *file_info; + nmem_init (); - log (LOG_DEBUG, "%c %s", cmd, fname); - for (i = strlen(fname); --i >= 0; ) - if (fname[i] == '/') - { - strcpy (ext, ""); - break; - } - else if (fname[i] == '.') - { - strcpy (ext, fname+i+1); - break; - } - sprintf (ext_res, "fileExtension.%s", ext); - if (!(file_type = res_get (common_resource, ext_res))) - return; - - file_info = dict_lookup (file_idx, fname); - if (!file_info) - { - sysno = sysno_next++; - dict_insert (file_idx, fname, sizeof(sysno), &sysno); - } - else - memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); - if (!strcmp (file_type, "text")) - text_extract (sysno, cmd, fname); -} +#ifdef WIN32 +#else + sprintf(nbuf, "%.40s(%d)", *argv, getpid()); + yaz_log_init_prefix (nbuf); +#endif -static void repository_extract_r (int cmd, char *rep) -{ - DIR *dir; - size_t rep_len; - struct dirent *dent; - struct stat fs; +#if ZEBRASDR + zebraSdr_std (); + rGroupDef.useSDR = 0; +#endif + rGroupDef.groupName = NULL; + rGroupDef.databaseName = NULL; + rGroupDef.path = NULL; + rGroupDef.recordId = NULL; + rGroupDef.recordType = NULL; + rGroupDef.flagStoreData = -1; + rGroupDef.flagStoreKeys = -1; + rGroupDef.flagRw = 1; + rGroupDef.databaseNamePath = 0; + rGroupDef.explainDatabase = 0; + rGroupDef.fileVerboseLimit = 100000; + rGroupDef.zebra_maps = NULL; + rGroupDef.dh = data1_create (); + rGroupDef.recTypes = recTypes_init (rGroupDef.dh); + recTypes_default_handlers (rGroupDef.recTypes); - rep_len = strlen(rep); - dir = opendir(rep); - if (!dir) + prog = *argv; + if (argc < 2) { - if (errno == ENOENT) - { - log (LOG_WARN|LOG_ERRNO, "opendir %s", rep); - return; - } - log (LOG_FATAL|LOG_ERRNO, "opendir %s", rep); + fprintf (stderr, "%s [options] command ...\n" + "Commands:\n" + " update Update index with files below .\n" + " If is empty filenames are read from stdin.\n" + " delete Delete index with files below .\n" + " commit Commit changes\n" + " clean Clean shadow files\n" + "Options:\n" + " -t Index files as (grs or text).\n" + " -c Read configuration file .\n" + " -g Index files according to group settings.\n" + " -d Records belong to Z39.50 database .\n" + " -m Use before flushing keys to disk.\n" + " -n Don't use shadow system.\n" + " -s Show analysis on stdout, but do no work.\n" + " -v Set logging to .\n" + " -l Write log to .\n" + " -f Display information for the first records.\n" +#if ZEBRASDR + " -S Use SDRKit\n" +#endif + " -V Show version.\n", *argv + ); exit (1); } - if (rep[rep_len-1] != '/') - rep[rep_len] = '/'; - else - --rep_len; - while ((dent = readdir (dir))) - { - strcpy (rep +rep_len+1, dent->d_name); - stat (rep, &fs); - switch (fs.st_mode & S_IFMT) - { - case S_IFREG: - file_extract (cmd, &fs, rep); - break; - case S_IFDIR: - if (strcmp (dent->d_name, ".") && strcmp(dent->d_name, "..")) - repository_extract_r (cmd, rep); - break; - } - } - closedir (dir); -} - -void repository_update_r (int cmd, const char *rep, const char *with_rep) -{ - -} - -void repository_traverse (int cmd, const char *rep) -{ - char rep_tmp1[2048]; - char rep_tmp2[2048]; - - strcpy (rep_tmp1, rep); - if (base_path) - { - strcpy (rep_tmp2, base_path); - repository_update_r (cmd, rep_tmp1, rep_tmp2); - } - else - repository_extract_r (cmd, rep_tmp1); -} - - -int main (int argc, char **argv) -{ - int ret; - int cmd = 0; - char *arg; - char *base_name; - - prog = *argv; - while ((ret = options ("b:v:", argv, argc, &arg)) != -2) + while ((ret = options ("sVt:c:g:d:m:v:nf:l:" +#if ZEBRASDR + "S" +#endif + , argv, argc, &arg)) != -2) { if (ret == 0) { - if (!base_name) + const char *rval; + if(cmd == 0) /* command */ { - base_name = arg; - - common_resource = res_open (base_name); if (!common_resource) { - log (LOG_FATAL, "Cannot open resource `%s'", base_name); - exit (1); +#if ZMBOL + logf (LOG_LOG, "zmbol version %s %s", + ZEBRAVER, ZEBRADATE); +#else + logf (LOG_LOG, "zebra version %s %s", + ZEBRAVER, ZEBRADATE); +#endif + common_resource = res_open (configName ? + configName : FNAME_CONFIG); + if (!common_resource) + { + logf (LOG_FATAL, "cannot read file `%s'", configName); + exit (1); + } + data1_set_tabpath (rGroupDef.dh, res_get (common_resource, + "profilePath")); + + rGroupDef.bfs = + bfs_create (res_get (common_resource, "register")); + if (!rGroupDef.bfs) + { + logf (LOG_FATAL, "Cannot access register"); + exit(1); + } + + bf_lockDir (rGroupDef.bfs, + res_get (common_resource, "lockDir")); + rGroupDef.zebra_maps = zebra_maps_open (common_resource); } - } - else if(cmd == 0) /* command */ - { - if (!strcmp (arg, "add")) + if (!strcmp (arg, "update")) + cmd = 'u'; + else if (!strcmp (arg, "update1")) + cmd = 'U'; + else if (!strcmp (arg, "update2")) + cmd = 'm'; + else if (!strcmp (arg, "dump")) + cmd = 's'; + else if (!strcmp (arg, "del") || !strcmp(arg, "delete")) + cmd = 'd'; + else if (!strcmp (arg, "init")) + { + zebraIndexUnlock(); + rval = res_get (common_resource, "shadow"); + zebraIndexLock (rGroupDef.bfs, 0, rval); + if (rval && *rval) + bf_cache (rGroupDef.bfs, rval); + zebraIndexLockMsg ("w"); + bf_reset (rGroupDef.bfs); + } + else if (!strcmp (arg, "commit")) { - cmd = 'a'; + rval = res_get (common_resource, "shadow"); + zebraIndexLock (rGroupDef.bfs, 1, rval); + if (rval && *rval) + bf_cache (rGroupDef.bfs, rval); + else + { + logf (LOG_FATAL, "Cannot perform commit"); + logf (LOG_FATAL, "No shadow area defined"); + exit (1); + } + if (bf_commitExists (rGroupDef.bfs)) + { + logf (LOG_LOG, "commit start"); + zebraIndexLockMsg ("c"); + zebraIndexWait (1); + logf (LOG_LOG, "commit execute"); + bf_commitExec (rGroupDef.bfs); +#ifndef WIN32 + sync (); +#endif + zebraIndexLockMsg ("d"); + zebraIndexWait (0); + logf (LOG_LOG, "commit clean"); + bf_commitClean (rGroupDef.bfs, rval); + } + else + logf (LOG_LOG, "nothing to commit"); } - else if (!strcmp (arg, "del")) + else if (!strcmp (arg, "clean")) { - cmd = 'd'; + rval = res_get (common_resource, "shadow"); + zebraIndexLock (rGroupDef.bfs, 1, rval); + if (bf_commitExists (rGroupDef.bfs)) + { + zebraIndexLockMsg ("d"); + zebraIndexWait (0); + logf (LOG_LOG, "commit clean"); + bf_commitClean (rGroupDef.bfs, rval); + } + else + logf (LOG_LOG, "nothing to clean"); + } + else if (!strcmp (arg, "stat") || !strcmp (arg, "status")) + { + Records records; + rval = res_get (common_resource, "shadow"); + zebraIndexLock (rGroupDef.bfs, 0, rval); + if (rval && *rval) + { + bf_cache (rGroupDef.bfs, rval); + zebraIndexLockMsg ("r"); + } + records = rec_open (rGroupDef.bfs, 0, 0); + rec_prstat (records); + rec_close (&records); + inv_prstat (rGroupDef.bfs); + } + else if (!strcmp (arg, "compact")) + { + rval = res_get (common_resource, "shadow"); + zebraIndexLock (rGroupDef.bfs, 0, rval); + if (rval && *rval) + { + bf_cache (rGroupDef.bfs, rval); + zebraIndexLockMsg ("r"); + } + inv_compact(rGroupDef.bfs); } else { - log (LOG_FATAL, "Unknown command: %s", arg); + logf (LOG_FATAL, "unknown command: %s", arg); exit (1); } } - else + else { - key_open ("keys.tmp"); - repository_traverse (cmd, arg); + struct recordGroup rGroup; +#if ZMBOL +#else + /* For zebra, delete lock file and reset register */ + if (rGroupDef.flagRw) + { + zebraIndexUnlock(); + bf_reset (rGroupDef.bfs); + } +#endif + rval = res_get (common_resource, "shadow"); + zebraIndexLock (rGroupDef.bfs, 0, rval); + if (rGroupDef.flagRw) + { + if (rval && *rval && !disableCommit) + { + bf_cache (rGroupDef.bfs, rval); + zebraIndexLockMsg ("r"); + } + else + { + bf_cache (rGroupDef.bfs, 0); + zebraIndexLockMsg ("w"); + } + zebraIndexWait (0); + } + memcpy (&rGroup, &rGroupDef, sizeof(rGroup)); + rGroup.path = arg; + switch (cmd) + { + case 'u': + if (!key_open (&rGroup, mem_max)) + { + logf (LOG_LOG, "updating %s", rGroup.path); + repositoryUpdate (&rGroup); + nsections = key_close (&rGroup); + } + break; + case 'U': + if (!key_open (&rGroup, mem_max)) + { + logf (LOG_LOG, "updating (pass 1) %s", rGroup.path); + repositoryUpdate (&rGroup); + key_close (&rGroup); + } + nsections = 0; + break; + case 'd': + if (!key_open (&rGroup,mem_max)) + { + logf (LOG_LOG, "deleting %s", rGroup.path); + repositoryDelete (&rGroup); + nsections = key_close (&rGroup); + } + break; + case 's': + logf (LOG_LOG, "dumping %s", rGroup.path); + repositoryShow (&rGroup); + nsections = 0; + break; + case 'm': + nsections = -1; + break; + default: + nsections = 0; + } cmd = 0; + if (nsections) + { + logf (LOG_LOG, "merging with index"); + key_input (rGroup.bfs, nsections, 60, common_resource); +#ifndef WIN32 + sync (); +#endif + } + log_event_end (NULL, NULL); } } - else if (ret == 'v') - { - log_init (log_mask_str(arg), prog, NULL); - } - else if (ret == 'b') + else if (ret == 'V') { - base_path = arg; +#if ZMBOL + fprintf (stderr, "Z'mbol %s %s\n", ZEBRAVER, ZEBRADATE); +#else + fprintf (stderr, "Zebra %s %s\n", ZEBRAVER, ZEBRADATE); +#endif + fprintf (stderr, " (C) 1994-2001, Index Data ApS\n"); +#ifdef WIN32 +#ifdef _DEBUG + fprintf (stderr, " WIN32 Debug\n"); +#else + fprintf (stderr, " WIN32 Release\n"); +#endif +#endif +#if HAVE_BZLIB_H + fprintf (stderr, "libbzip2\n" + " (C) 1996-1999 Julian R Seward. All rights reserved.\n"); +#endif } + else if (ret == 'v') + yaz_log_init_level (yaz_log_mask_str(arg)); + else if (ret == 'l') + yaz_log_init_file (arg); + else if (ret == 'm') + mem_max = 1024*1024*atoi(arg); + else if (ret == 'd') + rGroupDef.databaseName = arg; + else if (ret == 's') + rGroupDef.flagRw = 0; + else if (ret == 'g') + rGroupDef.groupName = arg; + else if (ret == 'f') + rGroupDef.fileVerboseLimit = atoi(arg); + else if (ret == 'c') + configName = arg; + else if (ret == 't') + rGroupDef.recordType = arg; + else if (ret == 'n') + disableCommit = 1; +#if ZEBRASDR + else if (ret == 'S') + rGroupDef.useSDR = 1; +#endif else - { - log (LOG_FATAL, "Unknown option '-%s'", arg); - exit (1); - } + logf (LOG_WARN, "unknown option '-%s'", arg); } - key_flush (); - key_close (); + recTypes_destroy (rGroupDef.recTypes); + if (common_resource) + { + zebraIndexUnlock (); + bfs_destroy (rGroupDef.bfs); + } + data1_destroy (rGroupDef.dh); exit (0); + return 0; } +