X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fmain.c;h=8cdc1061319f262ad3308feaf55d7a32bafbd8b3;hp=358b0a99524895c11762308c95d93b543983b164;hb=519fefb91135ad52134b9fc4e82b3874f5525a2b;hpb=0e816d23119c75153727b17b418066792d2ce8c8 diff --git a/index/main.c b/index/main.c index 358b0a9..8cdc106 100644 --- a/index/main.c +++ b/index/main.c @@ -1,420 +1,293 @@ -/* - * Copyright (C) 1994, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: main.c,v $ - * Revision 1.3 1995-09-01 10:57:07 adam - * Minor changes. - * - * Revision 1.2 1995/09/01 10:30:24 adam - * More work on indexing. Not working yet. - * - * Revision 1.1 1995/08/31 14:50:24 adam - * New simple file index tool. - * - */ -#include -#include -#include -#include -#include -#include -#include +/* $Id: main.c,v 1.100 2002-10-22 12:51:08 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 + Index Data Aps -#include -#include "index.h" +This file is part of the Zebra server. -char *prog; +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. -static int key_fd = -1; -#define KEY_BUF_SIZE 100000 -static char *key_buf; -int key_offset; -SYSNO sysno_next; -Dict file_idx; -static char *base_path = NULL; +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. -void key_open (const char *fname) -{ - void *file_key; - if (key_fd != -1) - return; - if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); - exit (1); - } - if (!(key_buf = malloc (KEY_BUF_SIZE))) - { - log (LOG_FATAL|LOG_ERRNO, "malloc"); - exit (1); - } - key_offset = 0; - if (!(file_idx = dict_open ("fileidx", 10, 1))) - { - log (LOG_FATAL, "dict_open fail of %s", "fileidx"); - exit (1); - } - file_key = dict_lookup (file_idx, "."); - if (file_key) - memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next)); - else - sysno_next = 1; -} +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ -void key_close (void) -{ - if (key_fd == -1) - return; - close (key_fd); - dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); - dict_close (file_idx); - key_fd = -1; -} -void key_flush (void) -{ - size_t i = 0; - int w; - - while (i < key_offset) - { - w = write (key_fd, key_buf + i, key_offset - i); - if (w == -1) - { - log (LOG_FATAL|LOG_ERRNO, "Write key fail"); - exit (1); - } - i += w; - } - key_offset = 0; -} +#include +#include +#include +#ifdef WIN32 +#include +#else +#include +#include +#endif +#include +#if HAVE_SYS_TIMES_H +#include +#endif -void key_write (int cmd, struct it_key *k, const char *str) -{ - char x = cmd; - size_t slen = strlen(str); +#include +#include "zebraapi.h" - if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2) - key_flush (); - memcpy (key_buf + key_offset, &x, 1); - key_offset++; - memcpy (key_buf + key_offset, k, sizeof(*k)); - key_offset += sizeof(*k); - memcpy (key_buf + key_offset, str, slen+1); - key_offset += slen+1; -} +char *prog; -void text_extract (SYSNO sysno, int cmd, const char *fname) +int main (int argc, char **argv) { - FILE *inf; - struct it_key k; - int seqno = 1; - int c; - char w[256]; - - log (LOG_DEBUG, "Text extract of %d", sysno); - k.sysno = sysno; - inf = fopen (fname, "r"); - if (!inf) - { - log (LOG_WARN|LOG_ERRNO, "open %s", fname); - return; - } - while ((c=getc (inf)) != EOF) - { - int i = 0; - while (i < 254 && c != EOF && isalnum(c)) - { - w[i++] = c; - c = getc (inf); - } - if (i) - { - w[i] = 0; - - k.seqno = seqno++; - k.field = 0; - key_write (cmd, &k, w); - } - if (c == EOF) - break; - } - fclose (inf); -} + int ret; + int cmd = 0; + char *arg; + char *configName = 0; + int nsections = 0; + int disableCommit = 0; + char *mem_max = 0; + + int trans_started=0; +#if HAVE_SYS_TIMES_H + struct tms tms1, tms2; + struct timeval start_time, end_time; + long usec; +#endif +#ifndef WIN32 + char nbuf[100]; +#endif + struct recordGroup rGroupDef; + ZebraService zs = 0; + ZebraHandle zh = 0; -void file_extract (int cmd, struct stat *fs, const char *fname, - const char *kname) -{ - int i; - char ext[128]; - SYSNO sysno; - char ext_res[128]; - const char *file_type; - void *file_info; + nmem_init (); - log (LOG_DEBUG, "%c %s k=%s", cmd, fname, kname); - return; - for (i = strlen(fname); --i >= 0; ) - if (fname[i] == '/') - { - strcpy (ext, ""); - break; - } - else if (fname[i] == '.') - { - strcpy (ext, fname+i+1); - break; - } - sprintf (ext_res, "fileExtension.%s", ext); - if (!(file_type = res_get (common_resource, ext_res))) - return; - - file_info = dict_lookup (file_idx, fname); - if (!file_info) - { - sysno = sysno_next++; - dict_insert (file_idx, fname, sizeof(sysno), &sysno); - } - else - memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); - if (!strcmp (file_type, "text")) - text_extract (sysno, cmd, fname); -} +#ifdef WIN32 +#else + sprintf(nbuf, "%.40s(%d)", *argv, getpid()); + yaz_log_init_prefix (nbuf); +#endif +#if HAVE_SYS_TIMES_H + times(&tms1); + gettimeofday(&start_time, 0); +#endif -static void repository_extract_r (int cmd, char *rep) -{ - struct dir_entry *e; - int i; - struct stat fs; - size_t rep_len = strlen (rep); + rGroupDef.groupName = NULL; + rGroupDef.databaseName = NULL; + rGroupDef.path = NULL; + rGroupDef.recordId = NULL; + rGroupDef.recordType = NULL; + rGroupDef.flagStoreData = -1; + rGroupDef.flagStoreKeys = -1; + rGroupDef.flagRw = 1; + rGroupDef.databaseNamePath = 0; + rGroupDef.explainDatabase = 0; + rGroupDef.fileVerboseLimit = 100000; + rGroupDef.followLinks = -1; - e = dir_open (rep); - if (!e) - return; - if (rep[rep_len-1] != '/') - rep[rep_len] = '/'; - else - --rep_len; - for (i=0; e[i].name; i++) + prog = *argv; + if (argc < 2) { - if (!strcmp (e[i].name, ".") || !strcmp (e[i].name, "..")) - continue; - strcpy (rep +rep_len+1, e[i].name); - stat (rep, &fs); - switch (fs.st_mode & S_IFMT) - { - case S_IFREG: - file_extract (cmd, &fs, rep, rep); - break; - case S_IFDIR: - repository_extract_r (cmd, rep); - break; - } + fprintf (stderr, "%s [options] command ...\n" + "Commands:\n" + " update Update index with files below .\n" + " If is empty filenames are read from stdin.\n" + " delete Delete index with files below .\n" + " commit Commit changes\n" + " clean Clean shadow files\n" + "Options:\n" + " -t Index files as (grs or text).\n" + " -c Read configuration file .\n" + " -g Index files according to group settings.\n" + " -d Records belong to Z39.50 database .\n" + " -m Use before flushing keys to disk.\n" + " -n Don't use shadow system.\n" + " -s Show analysis on stdout, but do no work.\n" + " -v Set logging to .\n" + " -l Write log to .\n" + " -L Don't follow symbolic links.\n" + " -f Display information for the first records.\n" + " -V Show version.\n", *argv + ); + exit (1); } - dir_free (&e); -} - -void repository_update_r (int cmd, char *dst, char *src); - -void repository_add_tree (int cmd, char *dst, char *src) -{ - mkdir (dst, 0755); - repository_update_r (cmd, dst, src); -} - -void repository_del_tree (int cmd, char *dst, char *src) -{ - log (LOG_DEBUG, "rmdir of %s", dst); -} - -void repository_update_r (int cmd, char *dst, char *src) -{ - struct dir_entry *e_dst, *e_src; - int i_dst = 0, i_src = 0; - struct stat fs_dst, fs_src; - size_t dst_len = strlen (dst); - size_t src_len = strlen (src); - - e_dst = dir_open (dst); - e_src = dir_open (src); - - if (!e_dst && !e_src) - return; - if (!e_dst) - repository_add_tree (cmd, dst, src); - else if (!e_src) - repository_del_tree (cmd, dst, src); - - dir_sort (e_src); - dir_sort (e_dst); - - if (src[src_len-1] != '/') - src[src_len] = '/'; - else - --src_len; - if (dst[dst_len-1] != '/') - dst[dst_len] = '/'; - else - --dst_len; - while (e_dst[i_dst].name || e_src[i_src].name) + while ((ret = options ("sVt:c:g:d:m:v:nf:l:L" + , argv, argc, &arg)) != -2) { - int sd; - - if (e_dst[i_dst].name && e_src[i_src].name) - sd = strcmp (e_dst[i_dst].name, e_src[i_src].name); - else if (e_src[i_src].name) - sd = 1; - else - sd = -1; - - if (sd == 0) + if (ret == 0) { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_src[i_src].name); - - /* check type, date, length */ - - if (strcmp (e_dst[i_dst].name, ".") && - strcmp (e_dst[i_dst].name, "..")) + if(cmd == 0) /* command */ { - stat (dst, &fs_dst); - stat (src, &fs_src); - - switch (fs_dst.st_mode & S_IFMT) + if (!zs) { - case S_IFREG: - if (fs_src.st_mtime != fs_dst.st_mtime) + const char *config = configName ? configName : "zebra.cfg"; + logf (LOG_LOG, "Zebra version %s %s", + ZEBRAVER, ZEBRADATE); + zs = zebra_start (config); + if (!zs) { - file_extract ('d', &fs_dst, dst, dst); - file_extract ('a', &fs_src, src, dst); - } - break; - case S_IFDIR: - repository_update_r (cmd, dst, src); - break; + yaz_log (LOG_FATAL, "Cannot read config %s", config); + exit (1); + } + zh = zebra_open (zs); + if (disableCommit) + zebra_shadow_enable (zh, 0); } - } - i_src++; - i_dst++; - } - else if (sd > 0) - { - strcpy (dst +dst_len+1, e_src[i_src].name); - strcpy (src +src_len+1, e_src[i_src].name); - - stat (src, &fs_src); - switch (fs_src.st_mode & S_IFMT) - { - case S_IFREG: - file_extract ('a', &fs_src, src, dst); - break; - case S_IFDIR: - repository_add_tree (cmd, dst, src); - break; - } - i_src++; - } - else - { - strcpy (dst +dst_len+1, e_dst[i_dst].name); - strcpy (src +src_len+1, e_dst[i_dst].name); - - stat (dst, &fs_dst); - switch (fs_dst.st_mode & S_IFMT) - { - case S_IFREG: - file_extract ('d', &fs_dst, dst, dst); - break; - case S_IFDIR: - repository_del_tree (cmd, dst, src); - break; - } - i_dst++; - } - } - dir_free (&e_dst); - dir_free (&e_src); -} - -void repository_traverse (int cmd, const char *rep) -{ - char rep_tmp1[2048]; - char rep_tmp2[2048]; - - strcpy (rep_tmp1, rep); - if (base_path) - { - strcpy (rep_tmp2, base_path); - repository_update_r (cmd, rep_tmp2, rep_tmp1); - } - else - repository_extract_r (cmd, rep_tmp1); -} + if (rGroupDef.databaseName) + { + if (zebra_select_database (zh, rGroupDef.databaseName)) + { + logf(LOG_FATAL, "Could not select database %s errCode=%d", + rGroupDef.databaseName, zebra_errCode(zh) ); + exit (1); + } + } + else + { + if (zebra_select_database (zh, "Default")) + { + logf(LOG_FATAL, "Could not select database Default errCode=%d", + zebra_errCode(zh) ); + exit (1); + } + } + if (mem_max) + zebra_set_resource(zh, "memmax",mem_max); -int main (int argc, char **argv) -{ - int ret; - int cmd = 0; - char *arg; - char *base_name; - - prog = *argv; - while ((ret = options ("r:v:", argv, argc, &arg)) != -2) - { - if (ret == 0) - { - if (!base_name) - { - base_name = arg; - - common_resource = res_open (base_name); - if (!common_resource) + if (!strcmp (arg, "update")) + cmd = 'u'; + else if (!strcmp (arg, "update1")) + cmd = 'U'; + else if (!strcmp (arg, "update2")) + cmd = 'm'; + else if (!strcmp (arg, "dump")) + cmd = 's'; + else if (!strcmp (arg, "del") || !strcmp(arg, "delete")) + cmd = 'd'; + else if (!strcmp (arg, "init")) + { + zebra_init (zh); + } + else if (!strcmp (arg, "commit")) { - log (LOG_FATAL, "Cannot open resource `%s'", base_name); - exit (1); + zebra_commit (zh); } - } - else if(cmd == 0) /* command */ - { - if (!strcmp (arg, "add")) + else if (!strcmp (arg, "clean")) { - cmd = 'a'; + assert (!"todo"); } - else if (!strcmp (arg, "del")) + else if (!strcmp (arg, "stat") || !strcmp (arg, "status")) { - cmd = 'd'; + zebra_register_statistics (zh,0); + } + else if (!strcmp (arg, "dump") || !strcmp (arg, "dumpdict")) + { + zebra_register_statistics (zh,1); + } + else if (!strcmp (arg, "compact")) + { + zebra_compact (zh); } else { - log (LOG_FATAL, "Unknown command: %s", arg); + logf (LOG_FATAL, "unknown command: %s", arg); exit (1); } } - else + else { - key_open ("keys.tmp"); - repository_traverse (cmd, arg); - cmd = 0; + rGroupDef.path = arg; + zebra_set_group (zh, &rGroupDef); + if (!trans_started) + { + trans_started=1; + zebra_begin_trans (zh); + } + + switch (cmd) + { + case 'u': + zebra_repository_update (zh); + break; + case 'd': + zebra_repository_delete (zh); + break; + case 's': + logf (LOG_LOG, "dumping %s", rGroupDef.path); + zebra_repository_show (zh); + nsections = 0; + break; + default: + nsections = 0; + } + log_event_end (NULL, NULL); } } - else if (ret == 'v') + else if (ret == 'V') { - log_init (log_mask_str(arg), prog, NULL); - } - else if (ret == 'r') - { - base_path = arg; + printf("Zebra %s %s\n", ZEBRAVER, ZEBRADATE); + printf(" (C) 1994-2002, Index Data ApS\n"); +#ifdef WIN32 +#ifdef _DEBUG + printf(" WIN32 Debug\n"); +#else + printf(" WIN32 Release\n"); +#endif +#endif +#if HAVE_BZLIB_H + printf("libbzip2\n" + " (C) 1996-1999 Julian R Seward. All rights reserved.\n"); +#endif } + else if (ret == 'v') + yaz_log_init_level (yaz_log_mask_str(arg)); + else if (ret == 'l') + yaz_log_init_file (arg); + else if (ret == 'm') + mem_max = arg; + else if (ret == 'd') + rGroupDef.databaseName = arg; + else if (ret == 's') + rGroupDef.flagRw = 0; + else if (ret == 'g') + rGroupDef.groupName = arg; + else if (ret == 'f') + rGroupDef.fileVerboseLimit = atoi(arg); + else if (ret == 'c') + configName = arg; + else if (ret == 't') + rGroupDef.recordType = arg; + else if (ret == 'n') + disableCommit = 1; + else if (ret == 'L') + rGroupDef.followLinks = 0; else - { - log (LOG_FATAL, "Unknown option '-%s'", arg); - exit (1); - } + logf (LOG_WARN, "unknown option '-%s'", arg); + } /* while arg */ + + if (trans_started) + zebra_end_trans (zh); + + zebra_close (zh); + zebra_stop (zs); +#if HAVE_SYS_TIMES_H + if (trans_started) + { + gettimeofday(&end_time, 0); + usec = (end_time.tv_sec - start_time.tv_sec) * 1000000L + + end_time.tv_usec - start_time.tv_usec; + times(&tms2); + yaz_log (LOG_LOG, "zebraidx times: %5.2f %5.2f %5.2f", + (double) usec / 1000000.0, + (double) (tms2.tms_utime - tms1.tms_utime)/100, + (double) (tms2.tms_stime - tms1.tms_stime)/100); } - key_flush (); - key_close (); +#endif exit (0); + return 0; } +