X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fkinput.c;h=92e64ecdfef3e1dfa5662ad27ecc492af30a219f;hb=297ba5c5c265a5f869f43a0a211bf9f48f700add;hp=f0649d66e4075433e74b216cb9b8f2b58f505002;hpb=245ce174727953c86b42469927d469f860f07230;p=idzebra-moved-to-github.git diff --git a/index/kinput.c b/index/kinput.c index f0649d6..92e64ec 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2000, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto * @@ -23,6 +23,7 @@ #include #include "index.h" +#include "zserver.h" #define KEY_SIZE (1+sizeof(struct it_key)) #define INP_NAME_MAX 768 @@ -49,13 +50,22 @@ struct key_file { /* handler invoked in each read */ void (*readHandler)(struct key_file *keyp, void *rinfo); void *readInfo; + Res res; }; -void getFnameTmp (char *fname, int no) +void getFnameTmp (Res res, char *fname, int no) { const char *pre; - pre = res_get_def (common_resource, "keyTmpDir", "."); + pre = res_get_def (res, "keyTmpDir", "."); + sprintf (fname, "%s/key%d.tmp", pre, no); +} + +void extract_get_fname_tmp (ZebraHandle zh, char *fname, int no) +{ + const char *pre; + + pre = res_get_def (zh->service->res, "keyTmpDir", "."); sprintf (fname, "%s/key%d.tmp", pre, no); } @@ -63,25 +73,30 @@ void key_file_chunk_read (struct key_file *f) { int nr = 0, r = 0, fd; char fname[1024]; - getFnameTmp (fname, f->no); + getFnameTmp (f->res, fname, f->no); fd = open (fname, O_BINARY|O_RDONLY); + + f->buf_ptr = 0; + f->buf_size = 0; if (fd == -1) { - logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", fname); - exit (1); + logf (LOG_WARN|LOG_ERRNO, "cannot open %s", fname); + return ; } if (!f->length) { if ((f->length = lseek (fd, 0L, SEEK_END)) == (off_t) -1) { - logf (LOG_FATAL|LOG_ERRNO, "cannot seek %s", fname); - exit (1); + logf (LOG_WARN|LOG_ERRNO, "cannot seek %s", fname); + close (fd); + return ; } } if (lseek (fd, f->offset, SEEK_SET) == -1) { - logf (LOG_FATAL|LOG_ERRNO, "cannot seek %s", fname); - exit (1); + logf (LOG_WARN|LOG_ERRNO, "cannot seek %s", fname); + close(fd); + return ; } while (f->chunk - nr > 0) { @@ -92,21 +107,22 @@ void key_file_chunk_read (struct key_file *f) } if (r == -1) { - logf (LOG_FATAL|LOG_ERRNO, "read of %s", fname); - exit (1); + logf (LOG_WARN|LOG_ERRNO, "read of %s", fname); + close (fd); + return; } f->buf_size = nr; - f->buf_ptr = 0; if (f->readHandler) (*f->readHandler)(f, f->readInfo); close (fd); } -struct key_file *key_file_init (int no, int chunk) +struct key_file *key_file_init (int no, int chunk, Res res) { struct key_file *f; f = (struct key_file *) xmalloc (sizeof(*f)); + f->res = res; f->sysno = 0; f->seqno = 0; f->no = no; @@ -208,11 +224,12 @@ struct heap_info { int *ptr; int (*cmp)(const void *p1, const void *p2); Dict dict; + ISAMS isams; +#if ZMBOL ISAM isam; ISAMC isamc; - ISAMS isams; - ISAMH isamh; ISAMD isamd; +#endif }; struct heap_info *key_heap_init (int nkeys, @@ -346,6 +363,7 @@ int heap_cread_item (void *vp, char **dst, int *insertMode) return 1; } +#if ZMBOL int heap_inpc (struct heap_info *hi) { struct heap_cread_info hci; @@ -397,92 +415,6 @@ int heap_inpc (struct heap_info *hi) return 0; } -int heap_inps (struct heap_info *hi) -{ - struct heap_cread_info hci; - ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i)); - - hci.key = (char *) xmalloc (KEY_SIZE); - hci.mode = 1; - hci.hi = hi; - hci.more = heap_read_one (hi, hci.cur_name, hci.key); - - isams_i->clientData = &hci; - isams_i->read_item = heap_cread_item; - - while (hci.more) - { - char this_name[INP_NAME_MAX]; - ISAMS_P isams_p; - char *dict_info; - - strcpy (this_name, hci.cur_name); - assert (hci.cur_name[1]); - no_diffs++; - if (!(dict_info = dict_lookup (hi->dict, hci.cur_name))) - { - isams_p = isams_merge (hi->isams, isams_i); - no_insertions++; - dict_insert (hi->dict, this_name, sizeof(ISAMS_P), &isams_p); - } - else - abort(); - } - xfree (isams_i); - return 0; -} - -int heap_inph (struct heap_info *hi) -{ - struct heap_cread_info hci; - ISAMH_I isamh_i = (ISAMH_I) xmalloc (sizeof(*isamh_i)); - - hci.key = (char *) xmalloc (KEY_SIZE); - hci.mode = 1; - hci.hi = hi; - hci.more = heap_read_one (hi, hci.cur_name, hci.key); - - isamh_i->clientData = &hci; - isamh_i->read_item = heap_cread_item; - - while (hci.more) - { - char this_name[INP_NAME_MAX]; - ISAMH_P isamh_p, isamh_p2; - char *dict_info; - - strcpy (this_name, hci.cur_name); - assert (hci.cur_name[1]); - no_diffs++; - if ((dict_info = dict_lookup (hi->dict, hci.cur_name))) - { - memcpy (&isamh_p, dict_info+1, sizeof(ISAMH_P)); - isamh_p2 = isamh_append (hi->isamh, isamh_p, isamh_i); - if (!isamh_p2) - { - no_deletions++; - if (!dict_delete (hi->dict, this_name)) - abort(); - } - else - { - no_updates++; - if (isamh_p2 != isamh_p) - dict_insert (hi->dict, this_name, - sizeof(ISAMH_P), &isamh_p2); - } - } - else - { - isamh_p = isamh_append (hi->isamh, 0, isamh_i); - no_insertions++; - dict_insert (hi->dict, this_name, sizeof(ISAMH_P), &isamh_p); - } - } - xfree (isamh_i); - return 0; -} - int heap_inpd (struct heap_info *hi) { struct heap_cread_info hci; @@ -534,9 +466,6 @@ int heap_inpd (struct heap_info *hi) return 0; } - - - int heap_inp (struct heap_info *hi) { char *info; @@ -607,6 +536,46 @@ int heap_inp (struct heap_info *hi) return 0; } +#endif + +int heap_inps (struct heap_info *hi) +{ + struct heap_cread_info hci; + ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i)); + + hci.key = (char *) xmalloc (KEY_SIZE); + hci.mode = 1; + hci.hi = hi; + hci.more = heap_read_one (hi, hci.cur_name, hci.key); + + isams_i->clientData = &hci; + isams_i->read_item = heap_cread_item; + + while (hci.more) + { + char this_name[INP_NAME_MAX]; + ISAMS_P isams_p; + char *dict_info; + + strcpy (this_name, hci.cur_name); + assert (hci.cur_name[1]); + no_diffs++; + if (!(dict_info = dict_lookup (hi->dict, hci.cur_name))) + { + isams_p = isams_merge (hi->isams, isams_i); + no_insertions++; + dict_insert (hi->dict, this_name, sizeof(ISAMS_P), &isams_p); + } + else + { + logf (LOG_FATAL, "isams doesn't support this kind of update"); + break; + } + } + xfree (isams_i); + return 0; +} + struct progressInfo { time_t startTime; time_t lastTime; @@ -642,15 +611,88 @@ void progressFunc (struct key_file *keyp, void *info) #define R_OK 4 #endif -void key_input (BFiles bfs, int nkeys, int cache) +void zebra_index_merge (ZebraHandle zh) +{ + struct key_file **kf; + char rbuf[1024]; + int i, r; + struct heap_info *hi; + struct progressInfo progressInfo; + int nkeys = zh->key_file_no; + + if (nkeys < 0) + { + char fname[1024]; + nkeys = 0; + while (1) + { + extract_get_fname_tmp (zh, fname, nkeys+1); + if (access (fname, R_OK) == -1) + break; + nkeys++; + } + if (!nkeys) + return ; + } + kf = (struct key_file **) xmalloc ((1+nkeys) * sizeof(*kf)); + progressInfo.totalBytes = 0; + progressInfo.totalOffset = 0; + time (&progressInfo.startTime); + time (&progressInfo.lastTime); + for (i = 1; i<=nkeys; i++) + { + kf[i] = key_file_init (i, 8192, zh->service->res); + kf[i]->readHandler = progressFunc; + kf[i]->readInfo = &progressInfo; + progressInfo.totalBytes += kf[i]->length; + progressInfo.totalOffset += kf[i]->buf_size; + } + hi = key_heap_init (nkeys, key_qsort_compare); + hi->dict = zh->service->dict; + hi->isams = zh->service->isams; +#if ZMBOL + hi->isam = zh->service->isam; + hi->isamc = zh->service->isamc; + hi->isamd = zh->service->isamd; +#endif + + for (i = 1; i<=nkeys; i++) + if ((r = key_file_read (kf[i], rbuf))) + key_heap_insert (hi, rbuf, r, kf[i]); + if (zh->service->isams) + heap_inps (hi); +#if ZMBOL + else if (zh->service->isamc) + heap_inpc (hi); + else if (zh->service->isam) + heap_inp (hi); + else if (zh->service->isamd) + heap_inpd (hi); +#endif + + for (i = 1; i<=nkeys; i++) + { + extract_get_fname_tmp (zh, rbuf, i); + unlink (rbuf); + } + logf (LOG_LOG, "Iterations . . .%7d", no_iterations); + logf (LOG_LOG, "Distinct words .%7d", no_diffs); + logf (LOG_LOG, "Updates. . . . .%7d", no_updates); + logf (LOG_LOG, "Deletions. . . .%7d", no_deletions); + logf (LOG_LOG, "Insertions . . .%7d", no_insertions); + zh->key_file_no = 0; +} + +void key_input (BFiles bfs, int nkeys, int cache, Res res) { Dict dict; + ISAMS isams = NULL; +#if ZMBOL ISAM isam = NULL; ISAMC isamc = NULL; - ISAMS isams = NULL; - ISAMH isamh = NULL; ISAMD isamd = NULL; +#endif struct key_file **kf; char rbuf[1024]; int i, r; @@ -663,7 +705,7 @@ void key_input (BFiles bfs, int nkeys, int cache) nkeys = 0; while (1) { - getFnameTmp (fname, nkeys+1); + getFnameTmp (res, fname, nkeys+1); if (access (fname, R_OK) == -1) break; nkeys++; @@ -677,59 +719,52 @@ void key_input (BFiles bfs, int nkeys, int cache) logf (LOG_FATAL, "dict_open fail"); exit (1); } - if (res_get_match (common_resource, "isam", "s", NULL)) + if (res_get_match (res, "isam", "s", ISAM_DEFAULT)) { struct ISAMS_M_s isams_m; isams = isams_open (bfs, FNAME_ISAMS, 1, - key_isams_m (common_resource, &isams_m)); + key_isams_m (res, &isams_m)); if (!isams) { logf (LOG_FATAL, "isams_open fail"); exit (1); } + logf (LOG_LOG, "isams opened"); } - else if (res_get_match (common_resource, "isam", "i", NULL)) +#if ZMBOL + else if (res_get_match (res, "isam", "i", ISAM_DEFAULT)) { isam = is_open (bfs, FNAME_ISAM, key_compare, 1, - sizeof(struct it_key), common_resource); + sizeof(struct it_key), res); if (!isam) { logf (LOG_FATAL, "is_open fail"); exit (1); } } - else if (res_get_match (common_resource, "isam", "h", NULL)) - { - isamh = isamh_open (bfs, FNAME_ISAMH, 1, - key_isamh_m (common_resource)); - if (!isamh) - { - logf (LOG_FATAL, "isamh_open fail"); - exit (1); - } - } - else if (res_get_match (common_resource, "isam", "d", NULL)) + else if (res_get_match (res, "isam", "d", ISAM_DEFAULT)) { struct ISAMD_M_s isamd_m; isamd = isamd_open (bfs, FNAME_ISAMD, 1, - key_isamd_m (common_resource,&isamd_m)); + key_isamd_m (res,&isamd_m)); if (!isamd) { logf (LOG_FATAL, "isamd_open fail"); exit (1); } } - else + else if (res_get_match (res, "isam", "c", ISAM_DEFAULT)) { struct ISAMC_M_s isamc_m; isamc = isc_open (bfs, FNAME_ISAMC, 1, - key_isamc_m (common_resource, &isamc_m)); + key_isamc_m (res, &isamc_m)); if (!isamc) { logf (LOG_FATAL, "isc_open fail"); exit (1); } } +#endif kf = (struct key_file **) xmalloc ((1+nkeys) * sizeof(*kf)); progressInfo.totalBytes = 0; progressInfo.totalOffset = 0; @@ -737,7 +772,7 @@ void key_input (BFiles bfs, int nkeys, int cache) time (&progressInfo.lastTime); for (i = 1; i<=nkeys; i++) { - kf[i] = key_file_init (i, 32768); + kf[i] = key_file_init (i, 8192, res); kf[i]->readHandler = progressFunc; kf[i]->readInfo = &progressInfo; progressInfo.totalBytes += kf[i]->length; @@ -745,41 +780,42 @@ void key_input (BFiles bfs, int nkeys, int cache) } hi = key_heap_init (nkeys, key_qsort_compare); hi->dict = dict; + hi->isams = isams; +#if ZMBOL hi->isam = isam; hi->isamc = isamc; - hi->isams = isams; - hi->isamh = isamh; hi->isamd = isamd; +#endif for (i = 1; i<=nkeys; i++) if ((r = key_file_read (kf[i], rbuf))) key_heap_insert (hi, rbuf, r, kf[i]); - if (isamc) - heap_inpc (hi); - else if (isams) + if (isams) heap_inps (hi); +#if ZMBOL + else if (isamc) + heap_inpc (hi); else if (isam) heap_inp (hi); - else if (isamh) - heap_inph (hi); else if (isamd) heap_inpd (hi); +#endif dict_close (dict); + if (isams) + isams_close (isams); +#if ZMBOL if (isam) is_close (isam); if (isamc) isc_close (isamc); - if (isams) - isams_close (isams); - if (isamh) - isamh_close (isamh); if (isamd) isamd_close (isamd); +#endif for (i = 1; i<=nkeys; i++) { - getFnameTmp (rbuf, i); + getFnameTmp (res, rbuf, i); unlink (rbuf); } logf (LOG_LOG, "Iterations . . .%7d", no_iterations); @@ -795,7 +831,20 @@ void key_input (BFiles bfs, int nkeys, int cache) /* * $Log: kinput.c,v $ - * Revision 1.40 1999-09-08 12:12:39 adam + * Revision 1.44 2000-05-18 12:01:36 adam + * System call times(2) used again. More 64-bit fixes. + * + * Revision 1.43 2000/03/20 19:08:36 adam + * Added remote record import using Z39.50 extended services and Segment + * Requests. + * + * Revision 1.42 1999/12/01 21:58:48 adam + * Proper handle of illegal use of isams. + * + * Revision 1.41 1999/11/30 13:48:03 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.40 1999/09/08 12:12:39 adam * Removed log message. * * Revision 1.39 1999/08/18 10:39:20 heikki