X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmain.c;h=06164991df43c7e1169d5d9a20437045b1e04ff2;hb=3003d747565b03bac252b43eca48bb691d1adbfc;hp=87f7c16f0975a2a9650df5fa02426b383d31ce1d;hpb=4cb66a9e7b322dd58c9d52f1a660a271d6c384f4;p=idzebra-moved-to-github.git diff --git a/index/main.c b/index/main.c index 87f7c16..0616499 100644 --- a/index/main.c +++ b/index/main.c @@ -1,252 +1,63 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.1 1995-08-31 14:50:24 adam + * Revision 1.10 1995-09-28 14:22:57 adam + * Sort uses smaller temporary files. + * + * Revision 1.9 1995/09/14 07:48:24 adam + * Record control management. + * + * Revision 1.8 1995/09/06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.7 1995/09/05 15:28:39 adam + * More work on search engine. + * + * Revision 1.6 1995/09/04 12:33:43 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.5 1995/09/04 09:10:39 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + * Revision 1.4 1995/09/01 14:06:36 adam + * Split of work into more files. + * + * Revision 1.3 1995/09/01 10:57:07 adam + * Minor changes. + * + * Revision 1.2 1995/09/01 10:30:24 adam + * More work on indexing. Not working yet. + * + * Revision 1.1 1995/08/31 14:50:24 adam * New simple file index tool. * */ #include #include #include -#include -#include -#include -#include -#include -#include +#include #include "index.h" char *prog; -static int key_fd = -1; -#define KEY_BUF_SIZE 100000 -static char *key_buf; -int key_offset; -SYSNO sysno_next; -Dict file_idx; -static char *base_path = NULL; - -void key_open (const char *fname) -{ - void *file_key; - if (key_fd != -1) - return; - if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); - exit (1); - } - if (!(key_buf = malloc (KEY_BUF_SIZE))) - { - log (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - key_offset = 0; - if (!(file_idx = dict_open ("fileidx", 10, 1))) - { - log (LOG_FATAL, "dict_open fail of %s", "fileidx"); - exit (1); - } - file_key = dict_lookup (file_idx, "."); - if (file_key) - memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next)); - else - sysno_next = 1; -} - -void key_close (void) -{ - if (key_fd == -1) - return; - close (key_fd); - dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); - dict_close (file_idx); - key_fd = -1; -} - -void key_flush (void) -{ - size_t i = 0; - int w; - - while (i < key_offset) - { - w = write (key_fd, key_buf + i, key_offset - i); - if (w == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Write key fail"); - exit (1); - } - i += w; - } - key_offset = 0; -} - -void key_write (int cmd, struct it_key *k, const char *str) -{ - char x = cmd; - size_t slen = strlen(str); - - if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2) - key_flush (); - memcpy (key_buf + key_offset, &x, 1); - key_offset++; - memcpy (key_buf + key_offset, k, sizeof(*k)); - key_offset += sizeof(*k); - memcpy (key_buf + key_offset, str, slen+1); - key_offset += slen+1; -} - -void text_extract (SYSNO sysno, int cmd, const char *fname) -{ - FILE *inf; - struct it_key k; - int seqno = 1; - int c; - char w[256]; - - log (LOG_DEBUG, "Text extract of %d", sysno); - k.sysno = sysno; - inf = fopen (fname, "r"); - if (!inf) - { - log (LOG_WARN|LOG_ERRNO, "open %s", fname); - return; - } - while ((c=getc (inf)) != EOF) - { - int i = 0; - while (i < 254 && c != EOF && isalnum(c)) - { - w[i++] = c; - c = getc (inf); - } - if (i) - { - w[i] = 0; - - k.seqno = seqno++; - k.field = 0; - key_write (cmd, &k, w); - } - if (c == EOF) - break; - } - fclose (inf); -} - -void file_extract (int cmd, struct stat *fs, const char *fname) -{ - int i; - char ext[128]; - SYSNO sysno; - char ext_res[128]; - const char *file_type; - void *file_info; - - log (LOG_DEBUG, "%c %s", cmd, fname); - for (i = strlen(fname); --i >= 0; ) - if (fname[i] == '/') - { - strcpy (ext, ""); - break; - } - else if (fname[i] == '.') - { - strcpy (ext, fname+i+1); - break; - } - sprintf (ext_res, "fileExtension.%s", ext); - if (!(file_type = res_get (common_resource, ext_res))) - return; - - file_info = dict_lookup (file_idx, fname); - if (!file_info) - { - sysno = sysno_next++; - dict_insert (file_idx, fname, sizeof(sysno), &sysno); - } - else - memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); - if (!strcmp (file_type, "text")) - text_extract (sysno, cmd, fname); -} - -static void repository_extract_r (int cmd, char *rep) -{ - DIR *dir; - size_t rep_len; - struct dirent *dent; - struct stat fs; - - rep_len = strlen(rep); - dir = opendir(rep); - if (!dir) - { - if (errno == ENOENT) - { - log (LOG_WARN|LOG_ERRNO, "opendir %s", rep); - return; - } - log (LOG_FATAL|LOG_ERRNO, "opendir %s", rep); - exit (1); - } - if (rep[rep_len-1] != '/') - rep[rep_len] = '/'; - else - --rep_len; - while ((dent = readdir (dir))) - { - strcpy (rep +rep_len+1, dent->d_name); - stat (rep, &fs); - switch (fs.st_mode & S_IFMT) - { - case S_IFREG: - file_extract (cmd, &fs, rep); - break; - case S_IFDIR: - if (strcmp (dent->d_name, ".") && strcmp(dent->d_name, "..")) - repository_extract_r (cmd, rep); - break; - } - } - closedir (dir); -} - -void repository_update_r (int cmd, const char *rep, const char *with_rep) -{ - -} - -void repository_traverse (int cmd, const char *rep) -{ - char rep_tmp1[2048]; - char rep_tmp2[2048]; - - strcpy (rep_tmp1, rep); - if (base_path) - { - strcpy (rep_tmp2, base_path); - repository_update_r (cmd, rep_tmp1, rep_tmp2); - } - else - repository_extract_r (cmd, rep_tmp1); -} - - int main (int argc, char **argv) { int ret; int cmd = 0; char *arg; - char *base_name; + char *base_name = NULL; + char *base_path = NULL; + int nsections; + char **mbuf; prog = *argv; - while ((ret = options ("b:v:", argv, argc, &arg)) != -2) + while ((ret = options ("r:v:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -257,7 +68,7 @@ int main (int argc, char **argv) common_resource = res_open (base_name); if (!common_resource) { - log (LOG_FATAL, "Cannot open resource `%s'", base_name); + logf (LOG_FATAL, "Cannot open resource `%s'", base_name); exit (1); } } @@ -273,14 +84,15 @@ int main (int argc, char **argv) } else { - log (LOG_FATAL, "Unknown command: %s", arg); + logf (LOG_FATAL, "Unknown command: %s", arg); exit (1); } } else { - key_open ("keys.tmp"); - repository_traverse (cmd, arg); + unlink ("keys.tmp"); + key_open (3000000); + repository (cmd, arg, base_path); cmd = 0; } } @@ -288,17 +100,30 @@ int main (int argc, char **argv) { log_init (log_mask_str(arg), prog, NULL); } - else if (ret == 'b') + else if (ret == 'r') { base_path = arg; } else { - log (LOG_FATAL, "Unknown option '-%s'", arg); + logf (LOG_FATAL, "Unknown option '-%s'", arg); exit (1); } } - key_flush (); - key_close (); + if (!base_name) + { + fprintf (stderr, "index [-v log] [-r repository] " + "base cmd1 dir1 cmd2 dir2 ...\n"); + exit (1); + } + nsections = key_close (); + if (!nsections) + exit (0); + logf (LOG_LOG, "Merge sorting"); + mbuf = xmalloc (100000); + merge_sort (mbuf, 1, nsections+1); + logf (LOG_LOG, "Input"); + key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, "keys1.tmp", 60); exit (0); } +