X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmain.c;h=ae3095e2337e020b7eb1ac66ef4150ff6d8ebadf;hb=366f5c2889c8bccb1f645aebf737b6082f200da5;hp=358b0a99524895c11762308c95d93b543983b164;hpb=0e816d23119c75153727b17b418066792d2ce8c8;p=idzebra-moved-to-github.git diff --git a/index/main.c b/index/main.c index 358b0a9..ae3095e 100644 --- a/index/main.c +++ b/index/main.c @@ -1,10 +1,62 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.3 1995-09-01 10:57:07 adam + * Revision 1.19 1995-11-25 10:24:06 adam + * More record fields - they are enumerated now. + * New options: flagStoreData flagStoreKey. + * + * Revision 1.18 1995/11/22 17:19:17 adam + * Record management uses the bfile system. + * + * Revision 1.17 1995/11/21 15:01:16 adam + * New general match criteria implemented. + * New feature: document groups. + * + * Revision 1.16 1995/11/20 11:56:27 adam + * Work on new traversal. + * + * Revision 1.15 1995/11/01 16:25:51 quinn + * *** empty log message *** + * + * Revision 1.14 1995/10/17 18:02:09 adam + * New feature: databases. Implemented as prefix to words in dictionary. + * + * Revision 1.13 1995/10/10 12:24:39 adam + * Temporary sort files are compressed. + * + * Revision 1.12 1995/10/04 16:57:20 adam + * Key input and merge sort in one pass. + * + * Revision 1.11 1995/09/29 14:01:45 adam + * Bug fixes. + * + * Revision 1.10 1995/09/28 14:22:57 adam + * Sort uses smaller temporary files. + * + * Revision 1.9 1995/09/14 07:48:24 adam + * Record control management. + * + * Revision 1.8 1995/09/06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.7 1995/09/05 15:28:39 adam + * More work on search engine. + * + * Revision 1.6 1995/09/04 12:33:43 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.5 1995/09/04 09:10:39 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + * Revision 1.4 1995/09/01 14:06:36 adam + * Split of work into more files. + * + * Revision 1.3 1995/09/01 10:57:07 adam * Minor changes. * * Revision 1.2 1995/09/01 10:30:24 adam @@ -17,367 +69,45 @@ #include #include #include -#include -#include -#include -#include -#include +#include +#include #include "index.h" char *prog; - -static int key_fd = -1; -#define KEY_BUF_SIZE 100000 -static char *key_buf; -int key_offset; -SYSNO sysno_next; -Dict file_idx; -static char *base_path = NULL; - -void key_open (const char *fname) -{ - void *file_key; - if (key_fd != -1) - return; - if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); - exit (1); - } - if (!(key_buf = malloc (KEY_BUF_SIZE))) - { - log (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - key_offset = 0; - if (!(file_idx = dict_open ("fileidx", 10, 1))) - { - log (LOG_FATAL, "dict_open fail of %s", "fileidx"); - exit (1); - } - file_key = dict_lookup (file_idx, "."); - if (file_key) - memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next)); - else - sysno_next = 1; -} - -void key_close (void) -{ - if (key_fd == -1) - return; - close (key_fd); - dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); - dict_close (file_idx); - key_fd = -1; -} - -void key_flush (void) -{ - size_t i = 0; - int w; - - while (i < key_offset) - { - w = write (key_fd, key_buf + i, key_offset - i); - if (w == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Write key fail"); - exit (1); - } - i += w; - } - key_offset = 0; -} - -void key_write (int cmd, struct it_key *k, const char *str) -{ - char x = cmd; - size_t slen = strlen(str); - - if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2) - key_flush (); - memcpy (key_buf + key_offset, &x, 1); - key_offset++; - memcpy (key_buf + key_offset, k, sizeof(*k)); - key_offset += sizeof(*k); - memcpy (key_buf + key_offset, str, slen+1); - key_offset += slen+1; -} - -void text_extract (SYSNO sysno, int cmd, const char *fname) -{ - FILE *inf; - struct it_key k; - int seqno = 1; - int c; - char w[256]; - - log (LOG_DEBUG, "Text extract of %d", sysno); - k.sysno = sysno; - inf = fopen (fname, "r"); - if (!inf) - { - log (LOG_WARN|LOG_ERRNO, "open %s", fname); - return; - } - while ((c=getc (inf)) != EOF) - { - int i = 0; - while (i < 254 && c != EOF && isalnum(c)) - { - w[i++] = c; - c = getc (inf); - } - if (i) - { - w[i] = 0; - - k.seqno = seqno++; - k.field = 0; - key_write (cmd, &k, w); - } - if (c == EOF) - break; - } - fclose (inf); -} - -void file_extract (int cmd, struct stat *fs, const char *fname, - const char *kname) -{ - int i; - char ext[128]; - SYSNO sysno; - char ext_res[128]; - const char *file_type; - void *file_info; - - log (LOG_DEBUG, "%c %s k=%s", cmd, fname, kname); - return; - for (i = strlen(fname); --i >= 0; ) - if (fname[i] == '/') - { - strcpy (ext, ""); - break; - } - else if (fname[i] == '.') - { - strcpy (ext, fname+i+1); - break; - } - sprintf (ext_res, "fileExtension.%s", ext); - if (!(file_type = res_get (common_resource, ext_res))) - return; - - file_info = dict_lookup (file_idx, fname); - if (!file_info) - { - sysno = sysno_next++; - dict_insert (file_idx, fname, sizeof(sysno), &sysno); - } - else - memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); - if (!strcmp (file_type, "text")) - text_extract (sysno, cmd, fname); -} - -static void repository_extract_r (int cmd, char *rep) -{ - struct dir_entry *e; - int i; - struct stat fs; - size_t rep_len = strlen (rep); - - e = dir_open (rep); - if (!e) - return; - if (rep[rep_len-1] != '/') - rep[rep_len] = '/'; - else - --rep_len; - for (i=0; e[i].name; i++) - { - if (!strcmp (e[i].name, ".") || !strcmp (e[i].name, "..")) - continue; - strcpy (rep +rep_len+1, e[i].name); - stat (rep, &fs); - switch (fs.st_mode & S_IFMT) - { - case S_IFREG: - file_extract (cmd, &fs, rep, rep); - break; - case S_IFDIR: - repository_extract_r (cmd, rep); - break; - } - } - dir_free (&e); -} - -void repository_update_r (int cmd, char *dst, char *src); - -void repository_add_tree (int cmd, char *dst, char *src) -{ - mkdir (dst, 0755); - repository_update_r (cmd, dst, src); -} - -void repository_del_tree (int cmd, char *dst, char *src) -{ - log (LOG_DEBUG, "rmdir of %s", dst); -} - -void repository_update_r (int cmd, char *dst, char *src) -{ - struct dir_entry *e_dst, *e_src; - int i_dst = 0, i_src = 0; - struct stat fs_dst, fs_src; - size_t dst_len = strlen (dst); - size_t src_len = strlen (src); - - e_dst = dir_open (dst); - e_src = dir_open (src); - - if (!e_dst && !e_src) - return; - if (!e_dst) - repository_add_tree (cmd, dst, src); - else if (!e_src) - repository_del_tree (cmd, dst, src); - - dir_sort (e_src); - dir_sort (e_dst); - - if (src[src_len-1] != '/') - src[src_len] = '/'; - else - --src_len; - if (dst[dst_len-1] != '/') - dst[dst_len] = '/'; - else - --dst_len; - while (e_dst[i_dst].name || e_src[i_src].name) - { - int sd; - - if (e_dst[i_dst].name && e_src[i_src].name) - sd = strcmp (e_dst[i_dst].name, e_src[i_src].name); - else if (e_src[i_src].name) - sd = 1; - else - sd = -1; - - if (sd == 0) - { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_src[i_src].name); - - /* check type, date, length */ - - if (strcmp (e_dst[i_dst].name, ".") && - strcmp (e_dst[i_dst].name, "..")) - { - stat (dst, &fs_dst); - stat (src, &fs_src); - - switch (fs_dst.st_mode & S_IFMT) - { - case S_IFREG: - if (fs_src.st_mtime != fs_dst.st_mtime) - { - file_extract ('d', &fs_dst, dst, dst); - file_extract ('a', &fs_src, src, dst); - } - break; - case S_IFDIR: - repository_update_r (cmd, dst, src); - break; - } - } - i_src++; - i_dst++; - } - else if (sd > 0) - { - strcpy (dst +dst_len+1, e_src[i_src].name); - strcpy (src +src_len+1, e_src[i_src].name); - - stat (src, &fs_src); - switch (fs_src.st_mode & S_IFMT) - { - case S_IFREG: - file_extract ('a', &fs_src, src, dst); - break; - case S_IFDIR: - repository_add_tree (cmd, dst, src); - break; - } - i_src++; - } - else - { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_dst[i_dst].name); - - stat (dst, &fs_dst); - switch (fs_dst.st_mode & S_IFMT) - { - case S_IFREG: - file_extract ('d', &fs_dst, dst, dst); - break; - case S_IFDIR: - repository_del_tree (cmd, dst, src); - break; - } - i_dst++; - } - } - dir_free (&e_dst); - dir_free (&e_src); -} - -void repository_traverse (int cmd, const char *rep) -{ - char rep_tmp1[2048]; - char rep_tmp2[2048]; - - strcpy (rep_tmp1, rep); - if (base_path) - { - strcpy (rep_tmp2, base_path); - repository_update_r (cmd, rep_tmp2, rep_tmp1); - } - else - repository_extract_r (cmd, rep_tmp1); -} - +size_t mem_max = 4*1024*1024; +extern char *data1_tabpath; int main (int argc, char **argv) { int ret; int cmd = 0; char *arg; - char *base_name; + char *configName = NULL; + int nsections; + int key_open_flag = 0; + + struct recordGroup rGroup; + + rGroup.groupName = NULL; + rGroup.databaseName = NULL; + rGroup.path = NULL; + rGroup.fileMatch = NULL; + rGroup.flagStoreData = -1; + rGroup.flagStoreKeys = -1; prog = *argv; - while ((ret = options ("r:v:", argv, argc, &arg)) != -2) + if (argc < 2) + { + fprintf (stderr, "index [-v log] [-m meg] [-c config] [-d base]" + " [-g group] cmd1 dir1 cmd2 dir2 ...\n"); + exit (1); + } + while ((ret = options ("c:g:v:m:d:", argv, argc, &arg)) != -2) { if (ret == 0) { - if (!base_name) - { - base_name = arg; - - common_resource = res_open (base_name); - if (!common_resource) - { - log (LOG_FATAL, "Cannot open resource `%s'", base_name); - exit (1); - } - } - else if(cmd == 0) /* command */ + if(cmd == 0) /* command */ { if (!strcmp (arg, "add")) { @@ -387,16 +117,43 @@ int main (int argc, char **argv) { cmd = 'd'; } + else if (!strcmp (arg, "update")) + { + cmd = 'u'; + } else { - log (LOG_FATAL, "Unknown command: %s", arg); + logf (LOG_FATAL, "Unknown command: %s", arg); exit (1); } } else { - key_open ("keys.tmp"); - repository_traverse (cmd, arg); + if (!common_resource) + { + common_resource = res_open (configName ? + configName : "base"); + if (!common_resource) + { + logf (LOG_FATAL, "Cannot open resource `%s'", + configName); + exit (1); + } + data1_tabpath = res_get (common_resource, "data1_tabpath"); + assert (data1_tabpath); + } + if (!key_open_flag) + { + key_open (mem_max); + key_open_flag = 1; + } + rGroup.path = arg; + if (cmd == 'u') + repositoryUpdate (&rGroup); + else if (cmd == 'a') + repositoryAdd (&rGroup); + else if (cmd == 'd') + repositoryDelete (&rGroup); cmd = 0; } } @@ -404,17 +161,33 @@ int main (int argc, char **argv) { log_init (log_mask_str(arg), prog, NULL); } - else if (ret == 'r') + else if (ret == 'm') { - base_path = arg; + mem_max = 1024*1024*atoi(arg); } + else if (ret == 'd') + { + rGroup.databaseName = arg; + } + else if (ret == 'g') + { + rGroup.groupName = arg; + } + else if (ret == 'c') + configName = arg; else { - log (LOG_FATAL, "Unknown option '-%s'", arg); + logf (LOG_FATAL, "Unknown option '-%s'", arg); exit (1); } } - key_flush (); - key_close (); + if (!key_open_flag) + exit (0); + nsections = key_close (); + if (!nsections) + exit (0); + logf (LOG_LOG, "Input"); + key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, nsections, 60); exit (0); } +