X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Ftrav.c;h=6ff72497d44bb131fdcd961a7d6cb76e7b175d8c;hp=3cd2001bda6114ed1061ab0be309351bd1dd7671;hb=6c9fcd3b5d3108702fa1ffc92dab4ab6060f9a19;hpb=2b1851bd5565e3d21f9cf9a37661a584c063b75f diff --git a/index/trav.c b/index/trav.c index 3cd2001..6ff7249 100644 --- a/index/trav.c +++ b/index/trav.c @@ -1,150 +1,28 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: trav.c,v $ - * Revision 1.38 2002-04-04 14:14:13 adam - * Multiple registers (alpha early) - * - * Revision 1.37 2002/02/20 17:30:01 adam - * Work on new API. Locking system re-implemented - * - * Revision 1.36 1999/05/15 14:36:38 adam - * Updated dictionary. Implemented "compression" of dictionary. - * - * Revision 1.35 1999/02/02 14:51:09 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.34 1998/06/08 14:43:14 adam - * Added suport for EXPLAIN Proxy servers - added settings databasePath - * and explainDatabase to facilitate this. Increased maximum number - * of databases and attributes in one register. - * - * Revision 1.33 1998/01/12 15:04:08 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.32 1997/09/25 14:56:51 adam - * Windows NT interface code to the stat call. - * - * Revision 1.31 1997/09/17 12:19:17 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.30 1997/09/09 13:38:09 adam - * Partial port to WIN95/NT. - * - * Revision 1.29 1997/02/12 20:39:47 adam - * Implemented options -f that limits the log to the first - * records. - * Changed some log messages also. - * - * Revision 1.28 1996/11/01 08:58:44 adam - * Interface to isamc system now includes update and delete. - * - * Revision 1.27 1996/10/29 14:06:56 adam - * Include zebrautl.h instead of alexutil.h. - * - * Revision 1.26 1996/06/04 10:19:01 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.25 1996/05/01 13:46:37 adam - * First work on multiple records in one file. - * New option, -offset, to the "unread" command in the filter module. - * - * Revision 1.24 1996/04/26 10:00:23 adam - * Added option -V to zebraidx to display version information. - * Removed stupid warnings from file update. - * - * Revision 1.23 1996/04/12 07:02:25 adam - * File update of single files. - * - * Revision 1.22 1996/04/09 06:50:50 adam - * Bug fix: bad reference in function fileUpdateR. - * - * Revision 1.21 1996/03/22 15:34:18 quinn - * Fixed bad reference - * - * Revision 1.20 1996/03/21 14:50:10 adam - * File update uses modify-time instead of change-time. - * - * Revision 1.19 1996/03/20 16:16:55 quinn - * Added diagnostic output - * - * Revision 1.18 1996/03/19 12:43:27 adam - * Bug fix: File update traversal didn't handle trailing slashes correctly. - * Bug fix: Update of sub directory groups wasn't handled correctly. - * - * Revision 1.17 1996/02/12 18:45:17 adam - * Changed naming of some functions. - * - * Revision 1.16 1996/02/05 12:30:02 adam - * Logging reduced a bit. - * The remaining running time is estimated during register merge. - * - * Revision 1.15 1995/12/07 17:38:48 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.14 1995/12/06 12:41:26 adam - * New command 'stat' for the index program. - * Filenames can be read from stdin by specifying '-'. - * Bug fix/enhancement of the transformation from terms to regular - * expressons in the search engine. - * - * Revision 1.13 1995/11/28 09:09:46 adam - * Zebra config renamed. - * Use setting 'recordId' to identify record now. - * Bug fix in recindex.c: rec_release_blocks was invokeded even - * though the blocks were already released. - * File traversal properly deletes records when needed. - * - * Revision 1.12 1995/11/24 11:31:37 adam - * Commands add & del read filenames from stdin if source directory is - * empty. - * Match criteria supports 'constant' strings. - * - * Revision 1.11 1995/11/22 17:19:19 adam - * Record management uses the bfile system. - * - * Revision 1.10 1995/11/21 15:01:16 adam - * New general match criteria implemented. - * New feature: document groups. - * - * Revision 1.9 1995/11/21 09:20:32 adam - * Yet more work on record match. - * - * Revision 1.8 1995/11/20 16:59:46 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.7 1995/11/20 11:56:28 adam - * Work on new traversal. - * - * Revision 1.6 1995/11/17 15:54:42 adam - * Started work on virtual directory structure. - * - * Revision 1.5 1995/10/17 18:02:09 adam - * New feature: databases. Implemented as prefix to words in dictionary. - * - * Revision 1.4 1995/09/28 09:19:46 adam - * xfree/xmalloc used everywhere. - * Extract/retrieve method seems to work for text records. - * - * Revision 1.3 1995/09/06 16:11:18 adam - * Option: only one word key per file. - * - * Revision 1.2 1995/09/04 12:33:43 adam - * Various cleanup. YAZ util used instead. - * - * Revision 1.1 1995/09/01 14:06:36 adam - * Split of work into more files. - * - */ +/* $Id: trav.c,v 1.47 2005-01-15 19:38:27 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ #include #include #include -#include #ifdef WIN32 #include #define S_ISREG(x) (x & _S_IFREG) @@ -166,17 +44,16 @@ static int repComp (const char *a, const char *b, size_t len) } static void repositoryExtractR (ZebraHandle zh, int deleteFlag, char *rep, - struct recordGroup *rGroup, int level) { struct dir_entry *e; int i; size_t rep_len = strlen (rep); - e = dir_open (rep); + e = dir_open (rep, zh->path_reg, zh->m_follow_links); if (!e) return; - logf (LOG_LOG, "dir %s", rep); + yaz_log (YLOG_LOG, "dir %s", rep); if (rep[rep_len-1] != '/') rep[rep_len] = '/'; else @@ -188,16 +65,14 @@ static void repositoryExtractR (ZebraHandle zh, int deleteFlag, char *rep, strcpy (rep +rep_len+1, e[i].name); if ((ecp = strrchr (e[i].name, '/'))) *ecp = '\0'; - if (level == 0 && rGroup->databaseNamePath) - rGroup->databaseName = e[i].name; switch (e[i].kind) { case dirs_file: - fileExtract (zh, NULL, rep, rGroup, deleteFlag); + fileExtract (zh, NULL, rep, deleteFlag); break; case dirs_dir: - repositoryExtractR (zh, deleteFlag, rep, rGroup, level+1); + repositoryExtractR (zh, deleteFlag, rep, level+1); break; } } @@ -207,8 +82,7 @@ static void repositoryExtractR (ZebraHandle zh, int deleteFlag, char *rep, static void fileDeleteR (ZebraHandle zh, struct dirs_info *di, struct dirs_entry *dst, - const char *base, char *src, - struct recordGroup *rGroup) + const char *base, char *src) { char tmppath[1024]; size_t src_len = strlen (src); @@ -219,7 +93,7 @@ static void fileDeleteR (ZebraHandle zh, { case dirs_file: sprintf (tmppath, "%s%s", base, dst->path); - fileExtract (zh, &dst->sysno, tmppath, rGroup, 1); + fileExtract (zh, &dst->sysno, tmppath, 1); strcpy (tmppath, dst->path); dst = dirs_read (di); @@ -239,7 +113,6 @@ static void fileDeleteR (ZebraHandle zh, static void fileUpdateR (ZebraHandle zh, struct dirs_info *di, struct dirs_entry *dst, const char *base, char *src, - struct recordGroup *rGroup, int level) { struct dir_entry *e_src; @@ -248,8 +121,8 @@ static void fileUpdateR (ZebraHandle zh, size_t src_len = strlen (src); sprintf (tmppath, "%s%s", base, src); - e_src = dir_open (tmppath); - logf (LOG_LOG, "dir %s", tmppath); + e_src = dir_open (tmppath, zh->path_reg, zh->m_follow_links); + yaz_log (YLOG_LOG, "dir %s", tmppath); #if 0 if (!dst || repComp (dst->path, src, src_len)) @@ -272,7 +145,7 @@ static void fileUpdateR (ZebraHandle zh, else if (!e_src) { strcpy (src, dst->path); - fileDeleteR (zh, di, dst, base, src, rGroup); + fileDeleteR (zh, di, dst, base, src); return; } else @@ -294,7 +167,7 @@ static void fileUpdateR (ZebraHandle zh, { if (e_src[i_src].name) { - logf (LOG_DEBUG, "dst=%s src=%s", dst->path + src_len, + yaz_log (YLOG_DEBUG, "dst=%s src=%s", dst->path + src_len, e_src[i_src].name); sd = strcmp (dst->path + src_len, e_src[i_src].name); } @@ -305,10 +178,8 @@ static void fileUpdateR (ZebraHandle zh, sd = 1; else break; - logf (LOG_DEBUG, "trav sd=%d", sd); + yaz_log (YLOG_DEBUG, "trav sd=%d", sd); - if (level == 0 && rGroup->databaseNamePath) - rGroup->databaseName = e_src[i_src].name; if (sd == 0) { strcpy (src + src_len, e_src[i_src].name); @@ -319,19 +190,19 @@ static void fileUpdateR (ZebraHandle zh, case dirs_file: if (e_src[i_src].mtime > dst->mtime) { - if (fileExtract (zh, &dst->sysno, tmppath, rGroup, 0)) + if (fileExtract (zh, &dst->sysno, tmppath, 0)) { dirs_add (di, src, dst->sysno, e_src[i_src].mtime); } - logf (LOG_DEBUG, "old: %s", ctime (&dst->mtime)); - logf (LOG_DEBUG, "new: %s", ctime (&e_src[i_src].mtime)); + yaz_log (YLOG_DEBUG, "old: %s", ctime (&dst->mtime)); + yaz_log (YLOG_DEBUG, "new: %s", ctime (&e_src[i_src].mtime)); } dst = dirs_read (di); break; case dirs_dir: - fileUpdateR (zh, di, dst, base, src, rGroup, level+1); + fileUpdateR (zh, di, dst, base, src, level+1); dst = dirs_last (di); - logf (LOG_DEBUG, "last is %s", dst ? dst->path : "null"); + yaz_log (YLOG_DEBUG, "last is %s", dst ? dst->path : "null"); break; default: dst = dirs_read (di); @@ -347,11 +218,11 @@ static void fileUpdateR (ZebraHandle zh, switch (e_src[i_src].kind) { case dirs_file: - if (fileExtract (zh, &sysno, tmppath, rGroup, 0)) + if (fileExtract (zh, &sysno, tmppath, 0)) dirs_add (di, src, sysno, e_src[i_src].mtime); break; case dirs_dir: - fileUpdateR (zh, di, dst, base, src, rGroup, level+1); + fileUpdateR (zh, di, dst, base, src, level+1); if (dst) dst = dirs_last (di); break; @@ -366,12 +237,12 @@ static void fileUpdateR (ZebraHandle zh, switch (dst->kind) { case dirs_file: - fileExtract (zh, &dst->sysno, tmppath, rGroup, 1); + fileExtract (zh, &dst->sysno, tmppath, 1); dirs_del (di, dst->path); dst = dirs_read (di); break; case dirs_dir: - fileDeleteR (zh, di, dst, base, src, rGroup); + fileDeleteR (zh, di, dst, base, src); dst = dirs_last (di); } } @@ -379,41 +250,22 @@ static void fileUpdateR (ZebraHandle zh, dir_free (&e_src); } -static void groupRes (ZebraHandle zh, struct recordGroup *rGroup) -{ - char resStr[256]; - char gPrefix[256]; - - if (!rGroup->groupName || !*rGroup->groupName) - *gPrefix = '\0'; - else - sprintf (gPrefix, "%s.", rGroup->groupName); - - sprintf (resStr, "%srecordId", gPrefix); - rGroup->recordId = res_get (zh->res, resStr); - sprintf (resStr, "%sdatabasePath", gPrefix); - rGroup->databaseNamePath = - atoi (res_get_def (zh->res, resStr, "0")); -} - -void repositoryShow (ZebraHandle zh) - +void repositoryShow (ZebraHandle zh, const char *path) { - struct recordGroup *rGroup = &zh->rGroup; char src[1024]; int src_len; struct dirs_entry *dst; Dict dict; struct dirs_info *di; - - if (!(dict = dict_open (zh->reg->bfs, FMATCH_DICT, 50, 0, 0))) + + if (!(dict = dict_open_res (zh->reg->bfs, FMATCH_DICT, 50, 0, 0, zh->res))) { - logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT); + yaz_log (YLOG_FATAL, "dict_open fail of %s", FMATCH_DICT); return; } - assert (rGroup->path); - strcpy (src, rGroup->path); + strncpy(src, path, sizeof(src)-1); + src[sizeof(src)-1]='\0'; src_len = strlen (src); if (src_len && src[src_len-1] != '/') @@ -422,30 +274,42 @@ void repositoryShow (ZebraHandle zh) src[++src_len] = '\0'; } - di = dirs_open (dict, src, rGroup->flagRw); + di = dirs_open (dict, src, zh->m_flag_rw); while ( (dst = dirs_read (di)) ) - logf (LOG_LOG, "%s", dst->path); + yaz_log (YLOG_LOG, "%s", dst->path); dirs_free (&di); dict_close (dict); } -static void fileUpdate (ZebraHandle zh, - Dict dict, struct recordGroup *rGroup, - const char *path) +static void fileUpdate (ZebraHandle zh, Dict dict, const char *path) { struct dirs_info *di; struct stat sbuf; char src[1024]; char dst[1024]; - int src_len; + int src_len, ret; assert (path); + + if (zh->path_reg && !yaz_is_abspath(path)) + { + strcpy (src, zh->path_reg); + strcat (src, "/"); + } + else + *src = '\0'; + strcat (src, path); + ret = zebra_file_stat (src, &sbuf, zh->m_follow_links); + strcpy (src, path); src_len = strlen (src); - stat (src, &sbuf); - if (S_ISREG(sbuf.st_mode)) + if (ret == -1) + { + yaz_log (YLOG_WARN|YLOG_ERRNO, "Cannot access path %s", src); + } + else if (S_ISREG(sbuf.st_mode)) { struct dirs_entry *e_dst; di = dirs_fopen (dict, src); @@ -454,13 +318,13 @@ static void fileUpdate (ZebraHandle zh, if (e_dst) { if (sbuf.st_mtime > e_dst->mtime) - if (fileExtract (zh, &e_dst->sysno, src, rGroup, 0)) + if (fileExtract (zh, &e_dst->sysno, src, 0)) dirs_add (di, src, e_dst->sysno, sbuf.st_mtime); } else { SYSNO sysno = 0; - if (fileExtract (zh, &sysno, src, rGroup, 0)) + if (fileExtract (zh, &sysno, src, 0)) dirs_add (di, src, sysno, sbuf.st_mtime); } dirs_free (&di); @@ -472,81 +336,91 @@ static void fileUpdate (ZebraHandle zh, src[src_len] = '/'; src[++src_len] = '\0'; } - di = dirs_open (dict, src, rGroup->flagRw); + di = dirs_open (dict, src, zh->m_flag_rw); *dst = '\0'; - fileUpdateR (zh, di, dirs_read (di), src, dst, rGroup, 0); + fileUpdateR (zh, di, dirs_read (di), src, dst, 0); dirs_free (&di); } else { - logf (LOG_WARN, "Ignoring path %s", src); + yaz_log (YLOG_WARN, "Skipping path %s", src); } } - static void repositoryExtract (ZebraHandle zh, - int deleteFlag, struct recordGroup *rGroup, - const char *path) + int deleteFlag, const char *path) { struct stat sbuf; char src[1024]; + int ret; assert (path); + + if (zh->path_reg && !yaz_is_abspath(path)) + { + strcpy (src, zh->path_reg); + strcat (src, "/"); + } + else + *src = '\0'; + strcat (src, path); + ret = zebra_file_stat (src, &sbuf, zh->m_follow_links); + strcpy (src, path); - stat (src, &sbuf); - if (S_ISREG(sbuf.st_mode)) - fileExtract (zh, NULL, src, rGroup, deleteFlag); + if (ret == -1) + yaz_log (YLOG_WARN|YLOG_ERRNO, "Cannot access path %s", src); + else if (S_ISREG(sbuf.st_mode)) + fileExtract (zh, NULL, src, deleteFlag); else if (S_ISDIR(sbuf.st_mode)) - repositoryExtractR (zh, deleteFlag, src, rGroup, 0); + repositoryExtractR (zh, deleteFlag, src, 0); else - logf (LOG_WARN, "Ignoring path %s", src); + yaz_log (YLOG_WARN, "Skipping path %s", src); } -static void repositoryExtractG (ZebraHandle zh, - int deleteFlag, struct recordGroup *rGroup) +static void repositoryExtractG (ZebraHandle zh, const char *path, + int deleteFlag) { - if (*rGroup->path == '\0' || !strcmp(rGroup->path, "-")) + if (!strcmp(path, "") || !strcmp(path, "-")) { char src[1024]; - - while (scanf ("%s", src) == 1) - repositoryExtract (zh, deleteFlag, rGroup, src); + + while (scanf ("%1020s", src) == 1) + repositoryExtract (zh, deleteFlag, src); } else - repositoryExtract (zh, deleteFlag, rGroup, rGroup->path); + repositoryExtract (zh, deleteFlag, path); } -void repositoryUpdate (ZebraHandle zh) +void repositoryUpdate (ZebraHandle zh, const char *path) { - struct recordGroup *rGroup = &zh->rGroup; - groupRes (zh, rGroup); - assert (rGroup->path); - if (rGroup->recordId && !strcmp (rGroup->recordId, "file")) + assert (path); + if (zh->m_record_id && !strcmp (zh->m_record_id, "file")) { Dict dict; - if (!(dict = dict_open (zh->reg->bfs, FMATCH_DICT, 50, - rGroup->flagRw, 0))) + if (!(dict = dict_open_res (zh->reg->bfs, FMATCH_DICT, 50, + zh->m_flag_rw, 0, zh->res))) { - logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT); + yaz_log (YLOG_FATAL, "dict_open fail of %s", FMATCH_DICT); return ; } - if (*rGroup->path == '\0' || !strcmp(rGroup->path, "-")) + if (!strcmp(path, "") || !strcmp(path, "-")) { char src[1024]; while (scanf ("%s", src) == 1) - fileUpdate (zh, dict, rGroup, src); + fileUpdate (zh, dict, src); } else - fileUpdate (zh, dict, rGroup, rGroup->path); + fileUpdate (zh, dict, path); dict_close (dict); } else - repositoryExtractG (zh, 0, rGroup); + repositoryExtractG (zh, path, 0); } -void repositoryDelete (ZebraHandle zh) +void repositoryDelete (ZebraHandle zh, const char *path) { - repositoryExtractG (zh, 1, &zh->rGroup); + assert (path); + repositoryExtractG (zh, path, 1); }