X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmain.c;h=17fdbb45b3820d9a88c05a3006cca81c031f50dc;hb=25c9afa691b8f62c2a17150cf1febd1f61755b12;hp=87f7c16f0975a2a9650df5fa02426b383d31ce1d;hpb=4cb66a9e7b322dd58c9d52f1a660a271d6c384f4;p=idzebra-moved-to-github.git diff --git a/index/main.c b/index/main.c index 87f7c16..17fdbb4 100644 --- a/index/main.c +++ b/index/main.c @@ -1,286 +1,193 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.1 1995-08-31 14:50:24 adam + * Revision 1.25 1995-12-01 16:24:39 adam + * Commit files use separate meta file area. + * + * Revision 1.24 1995/11/30 17:01:38 adam + * New setting commitCache: points to commit directories/files. + * New command commit: commits at the end of a zebraidx run. + * + * Revision 1.23 1995/11/30 08:34:31 adam + * Started work on commit facility. + * Changed a few malloc/free to xmalloc/xfree. + * + * Revision 1.22 1995/11/28 09:09:42 adam + * Zebra config renamed. + * Use setting 'recordId' to identify record now. + * Bug fix in recindex.c: rec_release_blocks was invokeded even + * though the blocks were already released. + * File traversal properly deletes records when needed. + * + * Revision 1.21 1995/11/27 14:27:39 adam + * Renamed 'update' command to 'dir'. + * + * Revision 1.20 1995/11/27 13:58:53 adam + * New option -t. storeStore data implemented in server. + * + * Revision 1.19 1995/11/25 10:24:06 adam + * More record fields - they are enumerated now. + * New options: flagStoreData flagStoreKey. + * + * Revision 1.18 1995/11/22 17:19:17 adam + * Record management uses the bfile system. + * + * Revision 1.17 1995/11/21 15:01:16 adam + * New general match criteria implemented. + * New feature: document groups. + * + * Revision 1.16 1995/11/20 11:56:27 adam + * Work on new traversal. + * + * Revision 1.15 1995/11/01 16:25:51 quinn + * *** empty log message *** + * + * Revision 1.14 1995/10/17 18:02:09 adam + * New feature: databases. Implemented as prefix to words in dictionary. + * + * Revision 1.13 1995/10/10 12:24:39 adam + * Temporary sort files are compressed. + * + * Revision 1.12 1995/10/04 16:57:20 adam + * Key input and merge sort in one pass. + * + * Revision 1.11 1995/09/29 14:01:45 adam + * Bug fixes. + * + * Revision 1.10 1995/09/28 14:22:57 adam + * Sort uses smaller temporary files. + * + * Revision 1.9 1995/09/14 07:48:24 adam + * Record control management. + * + * Revision 1.8 1995/09/06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.7 1995/09/05 15:28:39 adam + * More work on search engine. + * + * Revision 1.6 1995/09/04 12:33:43 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.5 1995/09/04 09:10:39 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + * Revision 1.4 1995/09/01 14:06:36 adam + * Split of work into more files. + * + * Revision 1.3 1995/09/01 10:57:07 adam + * Minor changes. + * + * Revision 1.2 1995/09/01 10:30:24 adam + * More work on indexing. Not working yet. + * + * Revision 1.1 1995/08/31 14:50:24 adam * New simple file index tool. * */ #include #include #include -#include -#include -#include -#include -#include -#include +#include +#include #include "index.h" char *prog; - -static int key_fd = -1; -#define KEY_BUF_SIZE 100000 -static char *key_buf; -int key_offset; -SYSNO sysno_next; -Dict file_idx; -static char *base_path = NULL; - -void key_open (const char *fname) -{ - void *file_key; - if (key_fd != -1) - return; - if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); - exit (1); - } - if (!(key_buf = malloc (KEY_BUF_SIZE))) - { - log (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - key_offset = 0; - if (!(file_idx = dict_open ("fileidx", 10, 1))) - { - log (LOG_FATAL, "dict_open fail of %s", "fileidx"); - exit (1); - } - file_key = dict_lookup (file_idx, "."); - if (file_key) - memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next)); - else - sysno_next = 1; -} - -void key_close (void) -{ - if (key_fd == -1) - return; - close (key_fd); - dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); - dict_close (file_idx); - key_fd = -1; -} - -void key_flush (void) -{ - size_t i = 0; - int w; - - while (i < key_offset) - { - w = write (key_fd, key_buf + i, key_offset - i); - if (w == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Write key fail"); - exit (1); - } - i += w; - } - key_offset = 0; -} - -void key_write (int cmd, struct it_key *k, const char *str) -{ - char x = cmd; - size_t slen = strlen(str); - - if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2) - key_flush (); - memcpy (key_buf + key_offset, &x, 1); - key_offset++; - memcpy (key_buf + key_offset, k, sizeof(*k)); - key_offset += sizeof(*k); - memcpy (key_buf + key_offset, str, slen+1); - key_offset += slen+1; -} - -void text_extract (SYSNO sysno, int cmd, const char *fname) -{ - FILE *inf; - struct it_key k; - int seqno = 1; - int c; - char w[256]; - - log (LOG_DEBUG, "Text extract of %d", sysno); - k.sysno = sysno; - inf = fopen (fname, "r"); - if (!inf) - { - log (LOG_WARN|LOG_ERRNO, "open %s", fname); - return; - } - while ((c=getc (inf)) != EOF) - { - int i = 0; - while (i < 254 && c != EOF && isalnum(c)) - { - w[i++] = c; - c = getc (inf); - } - if (i) - { - w[i] = 0; - - k.seqno = seqno++; - k.field = 0; - key_write (cmd, &k, w); - } - if (c == EOF) - break; - } - fclose (inf); -} - -void file_extract (int cmd, struct stat *fs, const char *fname) -{ - int i; - char ext[128]; - SYSNO sysno; - char ext_res[128]; - const char *file_type; - void *file_info; - - log (LOG_DEBUG, "%c %s", cmd, fname); - for (i = strlen(fname); --i >= 0; ) - if (fname[i] == '/') - { - strcpy (ext, ""); - break; - } - else if (fname[i] == '.') - { - strcpy (ext, fname+i+1); - break; - } - sprintf (ext_res, "fileExtension.%s", ext); - if (!(file_type = res_get (common_resource, ext_res))) - return; - - file_info = dict_lookup (file_idx, fname); - if (!file_info) - { - sysno = sysno_next++; - dict_insert (file_idx, fname, sizeof(sysno), &sysno); - } - else - memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); - if (!strcmp (file_type, "text")) - text_extract (sysno, cmd, fname); -} - -static void repository_extract_r (int cmd, char *rep) -{ - DIR *dir; - size_t rep_len; - struct dirent *dent; - struct stat fs; - - rep_len = strlen(rep); - dir = opendir(rep); - if (!dir) - { - if (errno == ENOENT) - { - log (LOG_WARN|LOG_ERRNO, "opendir %s", rep); - return; - } - log (LOG_FATAL|LOG_ERRNO, "opendir %s", rep); - exit (1); - } - if (rep[rep_len-1] != '/') - rep[rep_len] = '/'; - else - --rep_len; - while ((dent = readdir (dir))) - { - strcpy (rep +rep_len+1, dent->d_name); - stat (rep, &fs); - switch (fs.st_mode & S_IFMT) - { - case S_IFREG: - file_extract (cmd, &fs, rep); - break; - case S_IFDIR: - if (strcmp (dent->d_name, ".") && strcmp(dent->d_name, "..")) - repository_extract_r (cmd, rep); - break; - } - } - closedir (dir); -} - -void repository_update_r (int cmd, const char *rep, const char *with_rep) -{ - -} - -void repository_traverse (int cmd, const char *rep) -{ - char rep_tmp1[2048]; - char rep_tmp2[2048]; - - strcpy (rep_tmp1, rep); - if (base_path) - { - strcpy (rep_tmp2, base_path); - repository_update_r (cmd, rep_tmp1, rep_tmp2); - } - else - repository_extract_r (cmd, rep_tmp1); -} - +size_t mem_max = 4*1024*1024; +extern char *data1_tabpath; int main (int argc, char **argv) { + int commit_at_end = 0; int ret; int cmd = 0; char *arg; - char *base_name; + char *configName = NULL; + int nsections; + int key_open_flag = 0; + + struct recordGroup rGroupDef; + + rGroupDef.groupName = NULL; + rGroupDef.databaseName = NULL; + rGroupDef.path = NULL; + rGroupDef.recordId = NULL; + rGroupDef.recordType = NULL; + rGroupDef.flagStoreData = -1; + rGroupDef.flagStoreKeys = -1; prog = *argv; - while ((ret = options ("b:v:", argv, argc, &arg)) != -2) + if (argc < 2) + { + fprintf (stderr, "zebraidx [options] command ...\n" + "Commands:\n" + " update Update index with files below .\n" + " If is empty filenames are read from stdin.\n" + " delete Delete index with files below .\n" + "Options:\n" + " -t Index files as (grs or text).\n" + " -c Read configuration file .\n" + " -g Index files according to group settings.\n" + " -d Records belong to Z39.50 database .\n" + " -m Use before flushing keys to disk.\n" + " -v Set logging to .\n"); + exit (1); + } + while ((ret = options ("t:c:g:d:m:v:", argv, argc, &arg)) != -2) { if (ret == 0) { - if (!base_name) + if(cmd == 0) /* command */ { - base_name = arg; - - common_resource = res_open (base_name); if (!common_resource) { - log (LOG_FATAL, "Cannot open resource `%s'", base_name); - exit (1); - } - } - else if(cmd == 0) /* command */ - { - if (!strcmp (arg, "add")) - { - cmd = 'a'; + const char *rval; + common_resource = res_open (configName ? + configName : FNAME_CONFIG); + if (!common_resource) + { + logf (LOG_FATAL, "Cannot open resource `%s'", + configName); + exit (1); + } + data1_tabpath = res_get (common_resource, "profilePath"); + rval = res_get (common_resource, "commitEnable"); + if (rval && atoi(rval)) + bf_cache (); } - else if (!strcmp (arg, "del")) - { + if (!strcmp (arg, "update")) + cmd = 'u'; + else if (!strcmp (arg, "del") || !strcmp(arg, "delete")) cmd = 'd'; - } + else if (!strcmp (arg, "commit")) + commit_at_end = 1; else { - log (LOG_FATAL, "Unknown command: %s", arg); + logf (LOG_FATAL, "Unknown command: %s", arg); exit (1); } } else { - key_open ("keys.tmp"); - repository_traverse (cmd, arg); + struct recordGroup rGroup; + + memcpy (&rGroup, &rGroupDef, sizeof(rGroup)); + if (!key_open_flag) + { + key_open (mem_max); + key_open_flag = 1; + } + rGroup.path = arg; + if (cmd == 'u') + repositoryUpdate (&rGroup); + else if (cmd == 'd') + repositoryDelete (&rGroup); cmd = 0; } } @@ -288,17 +195,42 @@ int main (int argc, char **argv) { log_init (log_mask_str(arg), prog, NULL); } - else if (ret == 'b') + else if (ret == 'm') + { + mem_max = 1024*1024*atoi(arg); + } + else if (ret == 'd') + { + rGroupDef.databaseName = arg; + } + else if (ret == 'g') { - base_path = arg; + rGroupDef.groupName = arg; } + else if (ret == 'c') + configName = arg; + else if (ret == 't') + rGroupDef.recordType = arg; else { - log (LOG_FATAL, "Unknown option '-%s'", arg); + logf (LOG_FATAL, "Unknown option '-%s'", arg); exit (1); } } - key_flush (); - key_close (); + if (key_open_flag) + { + nsections = key_close (); + if (nsections) + { + logf (LOG_LOG, "Merging with index"); + key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, nsections, 60); + } + } + if (commit_at_end) + { + logf (LOG_LOG, "commiting"); + bf_commit (); + } exit (0); } +