/*
- * Copyright (C) 1995, Index Data I/S
+ * Copyright (C) 1994-1999, Index Data
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: trav.c,v $
- * Revision 1.2 1995-09-04 12:33:43 adam
+ * Revision 1.36 1999-05-15 14:36:38 adam
+ * Updated dictionary. Implemented "compression" of dictionary.
+ *
+ * Revision 1.35 1999/02/02 14:51:09 adam
+ * Updated WIN32 code specific sections. Changed header.
+ *
+ * Revision 1.34 1998/06/08 14:43:14 adam
+ * Added suport for EXPLAIN Proxy servers - added settings databasePath
+ * and explainDatabase to facilitate this. Increased maximum number
+ * of databases and attributes in one register.
+ *
+ * Revision 1.33 1998/01/12 15:04:08 adam
+ * The test option (-s) only uses read-lock (and not write lock).
+ *
+ * Revision 1.32 1997/09/25 14:56:51 adam
+ * Windows NT interface code to the stat call.
+ *
+ * Revision 1.31 1997/09/17 12:19:17 adam
+ * Zebra version corresponds to YAZ version 1.4.
+ * Changed Zebra server so that it doesn't depend on global common_resource.
+ *
+ * Revision 1.30 1997/09/09 13:38:09 adam
+ * Partial port to WIN95/NT.
+ *
+ * Revision 1.29 1997/02/12 20:39:47 adam
+ * Implemented options -f <n> that limits the log to the first <n>
+ * records.
+ * Changed some log messages also.
+ *
+ * Revision 1.28 1996/11/01 08:58:44 adam
+ * Interface to isamc system now includes update and delete.
+ *
+ * Revision 1.27 1996/10/29 14:06:56 adam
+ * Include zebrautl.h instead of alexutil.h.
+ *
+ * Revision 1.26 1996/06/04 10:19:01 adam
+ * Minor changes - removed include of ctype.h.
+ *
+ * Revision 1.25 1996/05/01 13:46:37 adam
+ * First work on multiple records in one file.
+ * New option, -offset, to the "unread" command in the filter module.
+ *
+ * Revision 1.24 1996/04/26 10:00:23 adam
+ * Added option -V to zebraidx to display version information.
+ * Removed stupid warnings from file update.
+ *
+ * Revision 1.23 1996/04/12 07:02:25 adam
+ * File update of single files.
+ *
+ * Revision 1.22 1996/04/09 06:50:50 adam
+ * Bug fix: bad reference in function fileUpdateR.
+ *
+ * Revision 1.21 1996/03/22 15:34:18 quinn
+ * Fixed bad reference
+ *
+ * Revision 1.20 1996/03/21 14:50:10 adam
+ * File update uses modify-time instead of change-time.
+ *
+ * Revision 1.19 1996/03/20 16:16:55 quinn
+ * Added diagnostic output
+ *
+ * Revision 1.18 1996/03/19 12:43:27 adam
+ * Bug fix: File update traversal didn't handle trailing slashes correctly.
+ * Bug fix: Update of sub directory groups wasn't handled correctly.
+ *
+ * Revision 1.17 1996/02/12 18:45:17 adam
+ * Changed naming of some functions.
+ *
+ * Revision 1.16 1996/02/05 12:30:02 adam
+ * Logging reduced a bit.
+ * The remaining running time is estimated during register merge.
+ *
+ * Revision 1.15 1995/12/07 17:38:48 adam
+ * Work locking mechanisms for concurrent updates/commit.
+ *
+ * Revision 1.14 1995/12/06 12:41:26 adam
+ * New command 'stat' for the index program.
+ * Filenames can be read from stdin by specifying '-'.
+ * Bug fix/enhancement of the transformation from terms to regular
+ * expressons in the search engine.
+ *
+ * Revision 1.13 1995/11/28 09:09:46 adam
+ * Zebra config renamed.
+ * Use setting 'recordId' to identify record now.
+ * Bug fix in recindex.c: rec_release_blocks was invokeded even
+ * though the blocks were already released.
+ * File traversal properly deletes records when needed.
+ *
+ * Revision 1.12 1995/11/24 11:31:37 adam
+ * Commands add & del read filenames from stdin if source directory is
+ * empty.
+ * Match criteria supports 'constant' strings.
+ *
+ * Revision 1.11 1995/11/22 17:19:19 adam
+ * Record management uses the bfile system.
+ *
+ * Revision 1.10 1995/11/21 15:01:16 adam
+ * New general match criteria implemented.
+ * New feature: document groups.
+ *
+ * Revision 1.9 1995/11/21 09:20:32 adam
+ * Yet more work on record match.
+ *
+ * Revision 1.8 1995/11/20 16:59:46 adam
+ * New update method: the 'old' keys are saved for each records.
+ *
+ * Revision 1.7 1995/11/20 11:56:28 adam
+ * Work on new traversal.
+ *
+ * Revision 1.6 1995/11/17 15:54:42 adam
+ * Started work on virtual directory structure.
+ *
+ * Revision 1.5 1995/10/17 18:02:09 adam
+ * New feature: databases. Implemented as prefix to words in dictionary.
+ *
+ * Revision 1.4 1995/09/28 09:19:46 adam
+ * xfree/xmalloc used everywhere.
+ * Extract/retrieve method seems to work for text records.
+ *
+ * Revision 1.3 1995/09/06 16:11:18 adam
+ * Option: only one word key per file.
+ *
+ * Revision 1.2 1995/09/04 12:33:43 adam
* Various cleanup. YAZ util used instead.
*
* Revision 1.1 1995/09/01 14:06:36 adam
* Split of work into more files.
*
*/
+
+
#include <stdio.h>
#include <assert.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <sys/stat.h>
#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef WIN32
+#include <io.h>
+#define S_ISREG(x) (x & _S_IFREG)
+#define S_ISDIR(x) (x & _S_IFDIR)
+#else
+#include <unistd.h>
+#endif
+#include <direntz.h>
#include <fcntl.h>
-#include <ctype.h>
+#include <time.h>
-#include <alexutil.h>
#include "index.h"
-static void repository_extract_r (int cmd, char *rep)
+static int repComp (const char *a, const char *b, size_t len)
+{
+ if (!len)
+ return 0;
+ return memcmp (a, b, len);
+}
+
+static void repositoryExtractR (int deleteFlag, char *rep,
+ struct recordGroup *rGroup,
+ int level)
{
struct dir_entry *e;
int i;
- struct stat fs;
size_t rep_len = strlen (rep);
e = dir_open (rep);
if (!e)
return;
+ logf (LOG_LOG, "dir %s", rep);
if (rep[rep_len-1] != '/')
rep[rep_len] = '/';
else
--rep_len;
+
for (i=0; e[i].name; i++)
{
+ char *ecp;
strcpy (rep +rep_len+1, e[i].name);
- stat (rep, &fs);
- switch (fs.st_mode & S_IFMT)
+ if ((ecp = strrchr (e[i].name, '/')))
+ *ecp = '\0';
+ if (level == 0 && rGroup->databaseNamePath)
+ rGroup->databaseName = e[i].name;
+
+ switch (e[i].kind)
{
- case S_IFREG:
- file_extract (cmd, rep, rep);
+ case dirs_file:
+ fileExtract (NULL, rep, rGroup, deleteFlag);
break;
- case S_IFDIR:
- repository_extract_r (cmd, rep);
+ case dirs_dir:
+ repositoryExtractR (deleteFlag, rep, rGroup, level+1);
break;
}
}
dir_free (&e);
-}
-
-void copy_file (const char *dst, const char *src)
-{
- int d_fd = open (dst, O_WRONLY|O_CREAT, 0666);
- int s_fd = open (src, O_RDONLY);
- char *buf;
- size_t i, r, w;
- if (d_fd == -1)
- {
- logf (LOG_FATAL|LOG_ERRNO, "Cannot create %s", dst);
- exit (1);
- }
- if (s_fd == -1)
- {
- logf (LOG_FATAL|LOG_ERRNO, "Cannot open %s", src);
- exit (1);
- }
- if (!(buf = malloc (4096)))
- {
- logf (LOG_FATAL|LOG_ERRNO, "malloc");
- exit (1);
- }
- while ((r=read (s_fd, buf, 4096))>0)
- for (w = 0; w < r; w += i)
- {
- i = write (d_fd, buf + w, r - w);
- if (i == -1)
- {
- logf (LOG_FATAL|LOG_ERRNO, "write");
- exit (1);
- }
- }
- if (r)
- {
- logf (LOG_FATAL|LOG_ERRNO, "read");
- exit (1);
- }
- free (buf);
- close (d_fd);
- close (s_fd);
}
-void del_file (const char *dst)
+static void fileDeleteR (struct dirs_info *di, struct dirs_entry *dst,
+ const char *base, char *src,
+ struct recordGroup *rGroup)
{
- unlink (dst);
-}
-
-void del_dir (const char *dst)
-{
- logf (LOG_DEBUG, "rmdir of %s", dst);
- if (rmdir (dst) == -1)
- logf (LOG_ERRNO|LOG_WARN, "rmdir");
-}
-
-void repository_update_r (int cmd, char *dst, char *src);
-
-void repository_add_tree (int cmd, char *dst, char *src)
-{
- mkdir (dst, 0755);
- repository_update_r (cmd, dst, src);
-}
-
-void repository_del_tree (int cmd, char *dst, char *src)
-{
- size_t dst_len = strlen (dst);
+ char tmppath[1024];
size_t src_len = strlen (src);
- struct dir_entry *e_dst;
- int i_dst = 0;
- struct stat fs_dst;
- e_dst = dir_open (dst);
-
- dir_sort (e_dst);
-
- if (src[src_len-1] != '/')
- src[src_len] = '/';
- else
- --src_len;
- if (dst[dst_len-1] != '/')
- dst[dst_len] = '/';
- else
- --dst_len;
- while (e_dst[i_dst].name)
+ while (dst && !repComp (dst->path, src, src_len+1))
{
- strcpy (dst +dst_len+1, e_dst[i_dst].name);
- strcpy (src +src_len+1, e_dst[i_dst].name);
-
- stat (dst, &fs_dst);
- switch (fs_dst.st_mode & S_IFMT)
+ switch (dst->kind)
{
- case S_IFREG:
- file_extract ('d', dst, dst);
- del_file (dst);
+ case dirs_file:
+ sprintf (tmppath, "%s%s", base, dst->path);
+ fileExtract (&dst->sysno, tmppath, rGroup, 1);
+
+ strcpy (tmppath, dst->path);
+ dst = dirs_read (di);
+ dirs_del (di, tmppath);
break;
- case S_IFDIR:
- repository_del_tree (cmd, dst, src);
+ case dirs_dir:
+ strcpy (tmppath, dst->path);
+ dst = dirs_read (di);
+ dirs_rmdir (di, tmppath);
break;
+ default:
+ dst = dirs_read (di);
}
- i_dst++;
- }
- dir_free (&e_dst);
- if (dst_len > 0)
- {
- dst[dst_len] = '\0';
- del_dir (dst);
}
}
-void repository_update_r (int cmd, char *dst, char *src)
+static void fileUpdateR (struct dirs_info *di, struct dirs_entry *dst,
+ const char *base, char *src,
+ struct recordGroup *rGroup,
+ int level)
{
- struct dir_entry *e_dst, *e_src;
- int i_dst = 0, i_src = 0;
- struct stat fs_dst, fs_src;
- size_t dst_len = strlen (dst);
+ struct dir_entry *e_src;
+ int i_src = 0;
+ static char tmppath[1024];
size_t src_len = strlen (src);
- e_dst = dir_open (dst);
- e_src = dir_open (src);
+ sprintf (tmppath, "%s%s", base, src);
+ e_src = dir_open (tmppath);
+ logf (LOG_LOG, "dir %s", tmppath);
- if (!e_dst && !e_src)
- return;
- if (!e_dst)
+#if 0
+ if (!dst || repComp (dst->path, src, src_len))
+#else
+ if (!dst || strcmp (dst->path, src))
+#endif
{
- dir_free (&e_src);
- repository_add_tree (cmd, dst, src);
- return;
+ if (!e_src)
+ return;
+
+ if (src_len && src[src_len-1] != '/')
+ {
+ src[src_len] = '/';
+ src[++src_len] = '\0';
+ }
+ dirs_mkdir (di, src, 0);
+ if (dst && repComp (dst->path, src, src_len))
+ dst = NULL;
}
else if (!e_src)
{
- dir_free (&e_dst);
- repository_del_tree (cmd, dst, src);
+ strcpy (src, dst->path);
+ fileDeleteR (di, dst, base, src, rGroup);
return;
}
-
+ else
+ {
+ if (src_len && src[src_len-1] != '/')
+ {
+ src[src_len] = '/';
+ src[++src_len] = '\0';
+ }
+ dst = dirs_read (di);
+ }
dir_sort (e_src);
- dir_sort (e_dst);
- if (src[src_len-1] != '/')
- src[src_len] = '/';
- else
- --src_len;
- if (dst[dst_len-1] != '/')
- dst[dst_len] = '/';
- else
- --dst_len;
- while (e_dst[i_dst].name || e_src[i_src].name)
+ while (1)
{
int sd;
- if (e_dst[i_dst].name && e_src[i_src].name)
- sd = strcmp (e_dst[i_dst].name, e_src[i_src].name);
+ if (dst && !repComp (dst->path, src, src_len))
+ {
+ if (e_src[i_src].name)
+ {
+ logf (LOG_DEBUG, "dst=%s src=%s", dst->path + src_len,
+ e_src[i_src].name);
+ sd = strcmp (dst->path + src_len, e_src[i_src].name);
+ }
+ else
+ sd = -1;
+ }
else if (e_src[i_src].name)
sd = 1;
else
- sd = -1;
-
+ break;
+ logf (LOG_DEBUG, "trav sd=%d", sd);
+
+ if (level == 0 && rGroup->databaseNamePath)
+ rGroup->databaseName = e_src[i_src].name;
if (sd == 0)
{
- strcpy (dst +dst_len+1, e_dst[i_dst].name);
- strcpy (src +src_len+1, e_src[i_src].name);
+ strcpy (src + src_len, e_src[i_src].name);
+ sprintf (tmppath, "%s%s", base, src);
- /* check type, date, length */
-
- stat (dst, &fs_dst);
- stat (src, &fs_src);
-
- switch (fs_dst.st_mode & S_IFMT)
+ switch (e_src[i_src].kind)
{
- case S_IFREG:
- if (fs_src.st_ctime > fs_dst.st_ctime)
+ case dirs_file:
+ if (e_src[i_src].mtime > dst->mtime)
{
- file_extract ('d', dst, dst);
- file_extract ('a', src, dst);
- copy_file (dst, src);
+ if (fileExtract (&dst->sysno, tmppath, rGroup, 0))
+ {
+ dirs_add (di, src, dst->sysno, e_src[i_src].mtime);
+ }
+ logf (LOG_DEBUG, "old: %s", ctime (&dst->mtime));
+ logf (LOG_DEBUG, "new: %s", ctime (&e_src[i_src].mtime));
}
+ dst = dirs_read (di);
break;
- case S_IFDIR:
- repository_update_r (cmd, dst, src);
+ case dirs_dir:
+ fileUpdateR (di, dst, base, src, rGroup, level+1);
+ dst = dirs_last (di);
+ logf (LOG_DEBUG, "last is %s", dst ? dst->path : "null");
break;
+ default:
+ dst = dirs_read (di);
}
i_src++;
- i_dst++;
}
else if (sd > 0)
{
- strcpy (dst +dst_len+1, e_src[i_src].name);
- strcpy (src +src_len+1, e_src[i_src].name);
-
- stat (src, &fs_src);
- switch (fs_src.st_mode & S_IFMT)
+ SYSNO sysno = 0;
+ strcpy (src + src_len, e_src[i_src].name);
+ sprintf (tmppath, "%s%s", base, src);
+
+ switch (e_src[i_src].kind)
{
- case S_IFREG:
- file_extract ('a', src, dst);
- copy_file (dst, src);
+ case dirs_file:
+ if (fileExtract (&sysno, tmppath, rGroup, 0))
+ dirs_add (di, src, sysno, e_src[i_src].mtime);
break;
- case S_IFDIR:
- repository_add_tree (cmd, dst, src);
+ case dirs_dir:
+ fileUpdateR (di, dst, base, src, rGroup, level+1);
+ if (dst)
+ dst = dirs_last (di);
break;
}
i_src++;
}
- else
+ else /* sd < 0 */
{
- strcpy (dst +dst_len+1, e_dst[i_dst].name);
- strcpy (src +src_len+1, e_dst[i_dst].name);
-
- stat (dst, &fs_dst);
- switch (fs_dst.st_mode & S_IFMT)
+ strcpy (src, dst->path);
+ sprintf (tmppath, "%s%s", base, dst->path);
+
+ switch (dst->kind)
{
- case S_IFREG:
- file_extract ('d', dst, dst);
- del_file (dst);
- break;
- case S_IFDIR:
- repository_del_tree (cmd, dst, src);
+ case dirs_file:
+ fileExtract (&dst->sysno, tmppath, rGroup, 1);
+ dirs_del (di, dst->path);
+ dst = dirs_read (di);
break;
+ case dirs_dir:
+ fileDeleteR (di, dst, base, src, rGroup);
+ dst = dirs_last (di);
}
- i_dst++;
}
}
- dir_free (&e_dst);
dir_free (&e_src);
}
-void repository (int cmd, const char *rep, const char *base_path)
+static void groupRes (struct recordGroup *rGroup)
+{
+ char resStr[256];
+ char gPrefix[256];
+
+ if (!rGroup->groupName || !*rGroup->groupName)
+ *gPrefix = '\0';
+ else
+ sprintf (gPrefix, "%s.", rGroup->groupName);
+
+ sprintf (resStr, "%srecordId", gPrefix);
+ rGroup->recordId = res_get (common_resource, resStr);
+ sprintf (resStr, "%sdatabasePath", gPrefix);
+ rGroup->databaseNamePath =
+ atoi (res_get_def (common_resource, resStr, "0"));
+}
+
+void repositoryShow (struct recordGroup *rGroup)
+{
+ char src[1024];
+ int src_len;
+ struct dirs_entry *dst;
+ Dict dict;
+ struct dirs_info *di;
+
+ if (!(dict = dict_open (rGroup->bfs, FMATCH_DICT, 50, 0, 0)))
+ {
+ logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT);
+ return;
+ }
+
+ assert (rGroup->path);
+ strcpy (src, rGroup->path);
+ src_len = strlen (src);
+
+ if (src_len && src[src_len-1] != '/')
+ {
+ src[src_len] = '/';
+ src[++src_len] = '\0';
+ }
+
+ di = dirs_open (dict, src, rGroup->flagRw);
+
+ while ( (dst = dirs_read (di)) )
+ logf (LOG_LOG, "%s", dst->path);
+ dirs_free (&di);
+ dict_close (dict);
+}
+
+static void fileUpdate (Dict dict, struct recordGroup *rGroup,
+ const char *path)
+{
+ struct dirs_info *di;
+ struct stat sbuf;
+ char src[1024];
+ char dst[1024];
+ int src_len;
+
+ assert (path);
+ strcpy (src, path);
+ src_len = strlen (src);
+
+ stat (src, &sbuf);
+ if (S_ISREG(sbuf.st_mode))
+ {
+ struct dirs_entry *e_dst;
+ di = dirs_fopen (dict, src);
+
+ e_dst = dirs_read (di);
+ if (e_dst)
+ {
+ if (sbuf.st_mtime > e_dst->mtime)
+ if (fileExtract (&e_dst->sysno, src, rGroup, 0))
+ dirs_add (di, src, e_dst->sysno, sbuf.st_mtime);
+ }
+ else
+ {
+ SYSNO sysno = 0;
+ if (fileExtract (&sysno, src, rGroup, 0))
+ dirs_add (di, src, sysno, sbuf.st_mtime);
+ }
+ dirs_free (&di);
+ }
+ else if (S_ISDIR(sbuf.st_mode))
+ {
+ if (src_len && src[src_len-1] != '/')
+ {
+ src[src_len] = '/';
+ src[++src_len] = '\0';
+ }
+ di = dirs_open (dict, src, rGroup->flagRw);
+ *dst = '\0';
+ fileUpdateR (di, dirs_read (di), src, dst, rGroup, 0);
+ dirs_free (&di);
+ }
+ else
+ {
+ logf (LOG_WARN, "Ignoring path %s", src);
+ }
+}
+
+
+static void repositoryExtract (int deleteFlag, struct recordGroup *rGroup,
+ const char *path)
{
- char rep_tmp1[2048];
- char rep_tmp2[2048];
+ struct stat sbuf;
+ char src[1024];
- strcpy (rep_tmp1, rep);
- if (base_path)
+ assert (path);
+ strcpy (src, path);
+
+ stat (src, &sbuf);
+ if (S_ISREG(sbuf.st_mode))
+ fileExtract (NULL, src, rGroup, deleteFlag);
+ else if (S_ISDIR(sbuf.st_mode))
+ repositoryExtractR (deleteFlag, src, rGroup, 0);
+ else
+ logf (LOG_WARN, "Ignoring path %s", src);
+}
+
+static void repositoryExtractG (int deleteFlag, struct recordGroup *rGroup)
+{
+ if (*rGroup->path == '\0' || !strcmp(rGroup->path, "-"))
{
- strcpy (rep_tmp2, base_path);
- repository_update_r (cmd, rep_tmp2, rep_tmp1);
+ char src[1024];
+
+ while (scanf ("%s", src) == 1)
+ repositoryExtract (deleteFlag, rGroup, src);
}
else
- repository_extract_r (cmd, rep_tmp1);
+ repositoryExtract (deleteFlag, rGroup, rGroup->path);
+}
+
+void repositoryUpdate (struct recordGroup *rGroup)
+{
+ groupRes (rGroup);
+ assert (rGroup->path);
+ if (rGroup->recordId && !strcmp (rGroup->recordId, "file"))
+ {
+ Dict dict;
+ if (!(dict = dict_open (rGroup->bfs, FMATCH_DICT, 50,
+ rGroup->flagRw, 0)))
+ {
+ logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT);
+ return ;
+ }
+ if (*rGroup->path == '\0' || !strcmp(rGroup->path, "-"))
+ {
+ char src[1024];
+ while (scanf ("%s", src) == 1)
+ fileUpdate (dict, rGroup, src);
+ }
+ else
+ fileUpdate (dict, rGroup, rGroup->path);
+ dict_close (dict);
+ }
+ else
+ repositoryExtractG (0, rGroup);
+}
+
+void repositoryDelete (struct recordGroup *rGroup)
+{
+ repositoryExtractG (1, rGroup);
}