X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Ftrav.c;h=b855eadfc80a92c7218c13f44eda445e17a9f66c;hp=1b5a09453de8b368d3809c62de35403d28700e72;hb=7e75317bed8eecabcb57e59b16093a32238738e2;hpb=ea01436a957572aaaa878b59469a4cedce7e5e21 diff --git a/index/trav.c b/index/trav.c index 1b5a094..b855ead 100644 --- a/index/trav.c +++ b/index/trav.c @@ -1,10 +1,116 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1999, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: trav.c,v $ - * Revision 1.8 1995-11-20 16:59:46 adam + * Revision 1.37 2002-02-20 17:30:01 adam + * Work on new API. Locking system re-implemented + * + * Revision 1.36 1999/05/15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.35 1999/02/02 14:51:09 adam + * Updated WIN32 code specific sections. Changed header. + * + * Revision 1.34 1998/06/08 14:43:14 adam + * Added suport for EXPLAIN Proxy servers - added settings databasePath + * and explainDatabase to facilitate this. Increased maximum number + * of databases and attributes in one register. + * + * Revision 1.33 1998/01/12 15:04:08 adam + * The test option (-s) only uses read-lock (and not write lock). + * + * Revision 1.32 1997/09/25 14:56:51 adam + * Windows NT interface code to the stat call. + * + * Revision 1.31 1997/09/17 12:19:17 adam + * Zebra version corresponds to YAZ version 1.4. + * Changed Zebra server so that it doesn't depend on global common_resource. + * + * Revision 1.30 1997/09/09 13:38:09 adam + * Partial port to WIN95/NT. + * + * Revision 1.29 1997/02/12 20:39:47 adam + * Implemented options -f that limits the log to the first + * records. + * Changed some log messages also. + * + * Revision 1.28 1996/11/01 08:58:44 adam + * Interface to isamc system now includes update and delete. + * + * Revision 1.27 1996/10/29 14:06:56 adam + * Include zebrautl.h instead of alexutil.h. + * + * Revision 1.26 1996/06/04 10:19:01 adam + * Minor changes - removed include of ctype.h. + * + * Revision 1.25 1996/05/01 13:46:37 adam + * First work on multiple records in one file. + * New option, -offset, to the "unread" command in the filter module. + * + * Revision 1.24 1996/04/26 10:00:23 adam + * Added option -V to zebraidx to display version information. + * Removed stupid warnings from file update. + * + * Revision 1.23 1996/04/12 07:02:25 adam + * File update of single files. + * + * Revision 1.22 1996/04/09 06:50:50 adam + * Bug fix: bad reference in function fileUpdateR. + * + * Revision 1.21 1996/03/22 15:34:18 quinn + * Fixed bad reference + * + * Revision 1.20 1996/03/21 14:50:10 adam + * File update uses modify-time instead of change-time. + * + * Revision 1.19 1996/03/20 16:16:55 quinn + * Added diagnostic output + * + * Revision 1.18 1996/03/19 12:43:27 adam + * Bug fix: File update traversal didn't handle trailing slashes correctly. + * Bug fix: Update of sub directory groups wasn't handled correctly. + * + * Revision 1.17 1996/02/12 18:45:17 adam + * Changed naming of some functions. + * + * Revision 1.16 1996/02/05 12:30:02 adam + * Logging reduced a bit. + * The remaining running time is estimated during register merge. + * + * Revision 1.15 1995/12/07 17:38:48 adam + * Work locking mechanisms for concurrent updates/commit. + * + * Revision 1.14 1995/12/06 12:41:26 adam + * New command 'stat' for the index program. + * Filenames can be read from stdin by specifying '-'. + * Bug fix/enhancement of the transformation from terms to regular + * expressons in the search engine. + * + * Revision 1.13 1995/11/28 09:09:46 adam + * Zebra config renamed. + * Use setting 'recordId' to identify record now. + * Bug fix in recindex.c: rec_release_blocks was invokeded even + * though the blocks were already released. + * File traversal properly deletes records when needed. + * + * Revision 1.12 1995/11/24 11:31:37 adam + * Commands add & del read filenames from stdin if source directory is + * empty. + * Match criteria supports 'constant' strings. + * + * Revision 1.11 1995/11/22 17:19:19 adam + * Record management uses the bfile system. + * + * Revision 1.10 1995/11/21 15:01:16 adam + * New general match criteria implemented. + * New feature: document groups. + * + * Revision 1.9 1995/11/21 09:20:32 adam + * Yet more work on record match. + * + * Revision 1.8 1995/11/20 16:59:46 adam * New update method: the 'old' keys are saved for each records. * * Revision 1.7 1995/11/20 11:56:28 adam @@ -30,288 +136,120 @@ * Split of work into more files. * */ + + #include #include -#include -#include -#include #include +#include +#ifdef WIN32 +#include +#define S_ISREG(x) (x & _S_IFREG) +#define S_ISDIR(x) (x & _S_IFDIR) +#else +#include +#endif +#include #include -#include +#include -#include #include "index.h" +#include "zserver.h" -static void repository_extract_r (int cmd, char *rep, char *databaseName) +static int repComp (const char *a, const char *b, size_t len) +{ + if (!len) + return 0; + return memcmp (a, b, len); +} + +static void repositoryExtractR (ZebraHandle zh, int deleteFlag, char *rep, + struct recordGroup *rGroup, + int level) { struct dir_entry *e; int i; - struct stat fs; size_t rep_len = strlen (rep); e = dir_open (rep); if (!e) return; + logf (LOG_LOG, "dir %s", rep); if (rep[rep_len-1] != '/') rep[rep_len] = '/'; else --rep_len; + for (i=0; e[i].name; i++) { + char *ecp; strcpy (rep +rep_len+1, e[i].name); - stat (rep, &fs); - switch (fs.st_mode & S_IFMT) + if ((ecp = strrchr (e[i].name, '/'))) + *ecp = '\0'; + if (level == 0 && rGroup->databaseNamePath) + rGroup->databaseName = e[i].name; + + switch (e[i].kind) { - case S_IFREG: - file_extract (cmd, rep, rep, databaseName); + case dirs_file: + fileExtract (zh, NULL, rep, rGroup, deleteFlag); break; - case S_IFDIR: - repository_extract_r (cmd, rep, databaseName); + case dirs_dir: + repositoryExtractR (zh, deleteFlag, rep, rGroup, level+1); break; } } dir_free (&e); -} - -void copy_file (const char *dst, const char *src) -{ - int d_fd = open (dst, O_WRONLY|O_CREAT, 0666); - int s_fd = open (src, O_RDONLY); - char *buf; - size_t i, r, w; - - if (d_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "Cannot create %s", dst); - exit (1); - } - if (s_fd == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "Cannot open %s", src); - exit (1); - } - buf = xmalloc (4096); - while ((r=read (s_fd, buf, 4096))>0) - for (w = 0; w < r; w += i) - { - i = write (d_fd, buf + w, r - w); - if (i == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "write"); - exit (1); - } - } - if (r) - { - logf (LOG_FATAL|LOG_ERRNO, "read"); - exit (1); - } - xfree (buf); - close (d_fd); - close (s_fd); -} - -void del_file (const char *dst) -{ - unlink (dst); -} -void del_dir (const char *dst) -{ - logf (LOG_DEBUG, "rmdir of %s", dst); - if (rmdir (dst) == -1) - logf (LOG_ERRNO|LOG_WARN, "rmdir"); -} - -void repository_update_r (int cmd, char *dst, char *src, char *databaseName); - -void repository_add_tree (int cmd, char *dst, char *src, char *databaseName) -{ - mkdir (dst, 0755); - repository_update_r (cmd, dst, src, databaseName); } -void repository_del_tree (int cmd, char *dst, char *src, char *databaseName) +static void fileDeleteR (ZebraHandle zh, + struct dirs_info *di, struct dirs_entry *dst, + const char *base, char *src, + struct recordGroup *rGroup) { - size_t dst_len = strlen (dst); + char tmppath[1024]; size_t src_len = strlen (src); - struct dir_entry *e_dst; - int i_dst = 0; - struct stat fs_dst; - e_dst = dir_open (dst); - - dir_sort (e_dst); - - if (src[src_len-1] != '/') - src[src_len] = '/'; - else - --src_len; - if (dst[dst_len-1] != '/') - dst[dst_len] = '/'; - else - --dst_len; - while (e_dst[i_dst].name) + while (dst && !repComp (dst->path, src, src_len+1)) { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_dst[i_dst].name); - - stat (dst, &fs_dst); - switch (fs_dst.st_mode & S_IFMT) + switch (dst->kind) { - case S_IFREG: - file_extract ('d', dst, dst, databaseName); - del_file (dst); + case dirs_file: + sprintf (tmppath, "%s%s", base, dst->path); + fileExtract (zh, &dst->sysno, tmppath, rGroup, 1); + + strcpy (tmppath, dst->path); + dst = dirs_read (di); + dirs_del (di, tmppath); break; - case S_IFDIR: - repository_del_tree (cmd, dst, src, databaseName); + case dirs_dir: + strcpy (tmppath, dst->path); + dst = dirs_read (di); + dirs_rmdir (di, tmppath); break; + default: + dst = dirs_read (di); } - i_dst++; - } - dir_free (&e_dst); - if (dst_len > 0) - { - dst[dst_len] = '\0'; - del_dir (dst); } } -void repository_update_r (int cmd, char *dst, char *src, char *databaseName) -{ - struct dir_entry *e_dst, *e_src; - int i_dst = 0, i_src = 0; - struct stat fs_dst, fs_src; - size_t dst_len = strlen (dst); - size_t src_len = strlen (src); - - e_dst = dir_open (dst); - e_src = dir_open (src); - - if (!e_dst && !e_src) - return; - if (!e_dst) - { - dir_free (&e_src); - repository_add_tree (cmd, dst, src, databaseName); - return; - } - else if (!e_src) - { - dir_free (&e_dst); - repository_del_tree (cmd, dst, src, databaseName); - return; - } - - dir_sort (e_src); - dir_sort (e_dst); - - if (src[src_len-1] != '/') - src[src_len] = '/'; - else - --src_len; - if (dst[dst_len-1] != '/') - dst[dst_len] = '/'; - else - --dst_len; - while (e_dst[i_dst].name || e_src[i_src].name) - { - int sd; - - if (e_dst[i_dst].name && e_src[i_src].name) - sd = strcmp (e_dst[i_dst].name, e_src[i_src].name); - else if (e_src[i_src].name) - sd = 1; - else - sd = -1; - - if (sd == 0) - { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_src[i_src].name); - - /* check type, date, length */ - - stat (dst, &fs_dst); - stat (src, &fs_src); - - switch (fs_dst.st_mode & S_IFMT) - { - case S_IFREG: - if (fs_src.st_ctime > fs_dst.st_ctime) - { - file_extract ('d', dst, dst, databaseName); - file_extract ('a', src, dst, databaseName); - copy_file (dst, src); - } - break; - case S_IFDIR: - repository_update_r (cmd, dst, src, databaseName); - break; - } - i_src++; - i_dst++; - } - else if (sd > 0) - { - strcpy (dst +dst_len+1, e_src[i_src].name); - strcpy (src +src_len+1, e_src[i_src].name); - - stat (src, &fs_src); - switch (fs_src.st_mode & S_IFMT) - { - case S_IFREG: - file_extract ('a', src, dst, databaseName); - copy_file (dst, src); - break; - case S_IFDIR: - repository_add_tree (cmd, dst, src, databaseName); - break; - } - i_src++; - } - else - { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_dst[i_dst].name); - - stat (dst, &fs_dst); - switch (fs_dst.st_mode & S_IFMT) - { - case S_IFREG: - file_extract ('d', dst, dst, databaseName); - del_file (dst); - break; - case S_IFDIR: - repository_del_tree (cmd, dst, src, databaseName); - break; - } - i_dst++; - } - } - dir_free (&e_dst); - dir_free (&e_src); -} - -static int repComp (const char *a, const char *b, size_t len) -{ - if (!len) - return 0; - return memcmp (a, b, len); -} - -static void repositoryUpdateR (struct dirs_info *di, struct dirs_entry *dst, - const char *base, char *src, char *databaseName) +static void fileUpdateR (ZebraHandle zh, + struct dirs_info *di, struct dirs_entry *dst, + const char *base, char *src, + struct recordGroup *rGroup, + int level) { struct dir_entry *e_src; int i_src = 0; - static char tmppath[256]; + static char tmppath[1024]; size_t src_len = strlen (src); sprintf (tmppath, "%s%s", base, src); e_src = dir_open (tmppath); + logf (LOG_LOG, "dir %s", tmppath); -#if 1 +#if 0 if (!dst || repComp (dst->path, src, src_len)) #else if (!dst || strcmp (dst->path, src)) @@ -319,30 +257,29 @@ static void repositoryUpdateR (struct dirs_info *di, struct dirs_entry *dst, { if (!e_src) return; -#if 1 - if (src_len && src[src_len-1] == '/') - --src_len; - else + + if (src_len && src[src_len-1] != '/') + { src[src_len] = '/'; - src[src_len+1] = '\0'; -#endif + src[++src_len] = '\0'; + } dirs_mkdir (di, src, 0); - dst = NULL; + if (dst && repComp (dst->path, src, src_len)) + dst = NULL; } else if (!e_src) { - /* delete tree dst */ + strcpy (src, dst->path); + fileDeleteR (zh, di, dst, base, src, rGroup); return; } else { -#if 1 - if (src_len && src[src_len-1] == '/') - --src_len; - else + if (src_len && src[src_len-1] != '/') + { src[src_len] = '/'; - src[src_len+1] = '\0'; -#endif + src[++src_len] = '\0'; + } dst = dirs_read (di); } dir_sort (e_src); @@ -351,13 +288,13 @@ static void repositoryUpdateR (struct dirs_info *di, struct dirs_entry *dst, { int sd; - if (dst && !repComp (dst->path, src, src_len+1)) + if (dst && !repComp (dst->path, src, src_len)) { if (e_src[i_src].name) { - logf (LOG_DEBUG, "dst=%s src=%s", dst->path + src_len+1, + logf (LOG_DEBUG, "dst=%s src=%s", dst->path + src_len, e_src[i_src].name); - sd = strcmp (dst->path + src_len+1, e_src[i_src].name); + sd = strcmp (dst->path + src_len, e_src[i_src].name); } else sd = -1; @@ -367,23 +304,30 @@ static void repositoryUpdateR (struct dirs_info *di, struct dirs_entry *dst, else break; logf (LOG_DEBUG, "trav sd=%d", sd); + + if (level == 0 && rGroup->databaseNamePath) + rGroup->databaseName = e_src[i_src].name; if (sd == 0) { - strcpy (src + src_len+1, e_src[i_src].name); + strcpy (src + src_len, e_src[i_src].name); sprintf (tmppath, "%s%s", base, src); switch (e_src[i_src].kind) { case dirs_file: - if (e_src[i_src].ctime > dst->ctime) + if (e_src[i_src].mtime > dst->mtime) { - if (fileExtract (&dst->sysno, tmppath, databaseName, 0)) - dirs_add (di, src, dst->sysno, e_src[i_src].ctime); + if (fileExtract (zh, &dst->sysno, tmppath, rGroup, 0)) + { + dirs_add (di, src, dst->sysno, e_src[i_src].mtime); + } + logf (LOG_DEBUG, "old: %s", ctime (&dst->mtime)); + logf (LOG_DEBUG, "new: %s", ctime (&e_src[i_src].mtime)); } dst = dirs_read (di); break; case dirs_dir: - repositoryUpdateR (di, dst, base, src, databaseName); + fileUpdateR (zh, di, dst, base, src, rGroup, level+1); dst = dirs_last (di); logf (LOG_DEBUG, "last is %s", dst ? dst->path : "null"); break; @@ -395,17 +339,17 @@ static void repositoryUpdateR (struct dirs_info *di, struct dirs_entry *dst, else if (sd > 0) { SYSNO sysno = 0; - strcpy (src + src_len+1, e_src[i_src].name); + strcpy (src + src_len, e_src[i_src].name); sprintf (tmppath, "%s%s", base, src); switch (e_src[i_src].kind) { case dirs_file: - if (fileExtract (&sysno, tmppath, databaseName, 0)) - dirs_add (di, src, sysno, e_src[i_src].ctime); + if (fileExtract (zh, &sysno, tmppath, rGroup, 0)) + dirs_add (di, src, sysno, e_src[i_src].mtime); break; case dirs_dir: - repositoryUpdateR (di, dst, base, src, databaseName); + fileUpdateR (zh, di, dst, base, src, rGroup, level+1); if (dst) dst = dirs_last (di); break; @@ -414,41 +358,193 @@ static void repositoryUpdateR (struct dirs_info *di, struct dirs_entry *dst, } else /* sd < 0 */ { - assert (0); + strcpy (src, dst->path); + sprintf (tmppath, "%s%s", base, dst->path); + + switch (dst->kind) + { + case dirs_file: + fileExtract (zh, &dst->sysno, tmppath, rGroup, 1); + dirs_del (di, dst->path); + dst = dirs_read (di); + break; + case dirs_dir: + fileDeleteR (zh, di, dst, base, src, rGroup); + dst = dirs_last (di); + } } } dir_free (&e_src); } -void repositoryUpdate (const char *path, char *databaseName) +static void groupRes (ZebraService zs, struct recordGroup *rGroup) { - struct dirs_info *di; - char src[256]; - Dict dict; + char resStr[256]; + char gPrefix[256]; - dict = dict_open ("repdict", 40, 1); + if (!rGroup->groupName || !*rGroup->groupName) + *gPrefix = '\0'; + else + sprintf (gPrefix, "%s.", rGroup->groupName); + + sprintf (resStr, "%srecordId", gPrefix); + rGroup->recordId = res_get (zs->res, resStr); + sprintf (resStr, "%sdatabasePath", gPrefix); + rGroup->databaseNamePath = + atoi (res_get_def (zs->res, resStr, "0")); +} + +void repositoryShow (ZebraHandle zh) + +{ + struct recordGroup *rGroup = &zh->rGroup; + char src[1024]; + int src_len; + struct dirs_entry *dst; + Dict dict; + struct dirs_info *di; + + if (!(dict = dict_open (zh->service->bfs, FMATCH_DICT, 50, 0, 0))) + { + logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT); + return; + } + + assert (rGroup->path); + strcpy (src, rGroup->path); + src_len = strlen (src); + + if (src_len && src[src_len-1] != '/') + { + src[src_len] = '/'; + src[++src_len] = '\0'; + } - di = dirs_open (dict, path); - strcpy (src, ""); - repositoryUpdateR (di, dirs_read (di), path, src, databaseName); + di = dirs_open (dict, src, rGroup->flagRw); + + while ( (dst = dirs_read (di)) ) + logf (LOG_LOG, "%s", dst->path); dirs_free (&di); - dict_close (dict); } -void repository (int cmd, const char *rep, const char *base_path, - char *databaseName) +static void fileUpdate (ZebraHandle zh, + Dict dict, struct recordGroup *rGroup, + const char *path) +{ + struct dirs_info *di; + struct stat sbuf; + char src[1024]; + char dst[1024]; + int src_len; + + assert (path); + strcpy (src, path); + src_len = strlen (src); + + stat (src, &sbuf); + if (S_ISREG(sbuf.st_mode)) + { + struct dirs_entry *e_dst; + di = dirs_fopen (dict, src); + + e_dst = dirs_read (di); + if (e_dst) + { + if (sbuf.st_mtime > e_dst->mtime) + if (fileExtract (zh, &e_dst->sysno, src, rGroup, 0)) + dirs_add (di, src, e_dst->sysno, sbuf.st_mtime); + } + else + { + SYSNO sysno = 0; + if (fileExtract (zh, &sysno, src, rGroup, 0)) + dirs_add (di, src, sysno, sbuf.st_mtime); + } + dirs_free (&di); + } + else if (S_ISDIR(sbuf.st_mode)) + { + if (src_len && src[src_len-1] != '/') + { + src[src_len] = '/'; + src[++src_len] = '\0'; + } + di = dirs_open (dict, src, rGroup->flagRw); + *dst = '\0'; + fileUpdateR (zh, di, dirs_read (di), src, dst, rGroup, 0); + dirs_free (&di); + } + else + { + logf (LOG_WARN, "Ignoring path %s", src); + } +} + + +static void repositoryExtract (ZebraHandle zh, + int deleteFlag, struct recordGroup *rGroup, + const char *path) { - char rep_tmp1[2048]; - char rep_tmp2[2048]; + struct stat sbuf; + char src[1024]; + + assert (path); + strcpy (src, path); - strcpy (rep_tmp1, rep); - if (base_path) + stat (src, &sbuf); + if (S_ISREG(sbuf.st_mode)) + fileExtract (zh, NULL, src, rGroup, deleteFlag); + else if (S_ISDIR(sbuf.st_mode)) + repositoryExtractR (zh, deleteFlag, src, rGroup, 0); + else + logf (LOG_WARN, "Ignoring path %s", src); +} + +static void repositoryExtractG (ZebraHandle zh, + int deleteFlag, struct recordGroup *rGroup) +{ + if (*rGroup->path == '\0' || !strcmp(rGroup->path, "-")) { - strcpy (rep_tmp2, base_path); - repository_update_r (cmd, rep_tmp2, rep_tmp1, databaseName); + char src[1024]; + + while (scanf ("%s", src) == 1) + repositoryExtract (zh, deleteFlag, rGroup, src); } else - repository_extract_r (cmd, rep_tmp1, databaseName); + repositoryExtract (zh, deleteFlag, rGroup, rGroup->path); +} + +void repositoryUpdate (ZebraHandle zh) +{ + struct recordGroup *rGroup = &zh->rGroup; + groupRes (zh->service, rGroup); + assert (rGroup->path); + if (rGroup->recordId && !strcmp (rGroup->recordId, "file")) + { + Dict dict; + if (!(dict = dict_open (zh->service->bfs, FMATCH_DICT, 50, + rGroup->flagRw, 0))) + { + logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT); + return ; + } + if (*rGroup->path == '\0' || !strcmp(rGroup->path, "-")) + { + char src[1024]; + while (scanf ("%s", src) == 1) + fileUpdate (zh, dict, rGroup, src); + } + else + fileUpdate (zh, dict, rGroup, rGroup->path); + dict_close (dict); + } + else + repositoryExtractG (zh, 0, rGroup); +} + +void repositoryDelete (ZebraHandle zh) +{ + repositoryExtractG (zh, 1, &zh->rGroup); }