From a02c4cf957603c707fe9f4fde94a468f1d030dfb Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 4 Sep 1995 09:10:34 +0000 Subject: [PATCH] More work on index add/del/update. Merge sort implemented. Initial work on z39 server. --- index/Makefile | 25 +++++++--- index/extract.c | 28 ++++++++--- index/index.h | 14 +++++- index/kcompare.c | 41 ++++++++++++++++ index/kdump.c | 88 +++++++++++++++++++++++++++++++++ index/kinput.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ index/main.c | 16 +++++- index/zrpn.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ index/zserver.c | 61 +++++++++++++++++++++++ 9 files changed, 541 insertions(+), 18 deletions(-) create mode 100644 index/kcompare.c create mode 100644 index/kdump.c create mode 100644 index/kinput.c create mode 100644 index/zrpn.c create mode 100644 index/zserver.c diff --git a/index/Makefile b/index/Makefile index f658e9b..f7377ae 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,19 +1,30 @@ # Copyright (C) 1995, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.3 1995-09-01 14:06:35 adam Exp $ +# $Id: Makefile,v 1.4 1995-09-04 09:10:34 adam Exp $ SHELL=/bin/sh -INCLUDE=-I../include +INCLUDE=-I../include -I../../yaz/include TPROG1=index +TPROG2=kdump +TPROG3=zserver DEFS=$(INCLUDE) -O = main.o dir.o trav.o extract.o +O1 = main.o dir.o trav.o extract.o kinput.o kcompare.o ksort.o +O2 = kdump.o +O3 = zserver.o kcompare.o zrpn.o CPP=cc -E -all: $(TPROG1) +all: $(TPROG1) $(TPROG2) $(TPROG3) + +$(TPROG1): $(O1) ../lib/dict.a ../lib/isam.a ../lib/bfile.a ../lib/util.a + $(CC) $(CFLAGS) -o $(TPROG1) $(O1) ../lib/dict.a ../lib/isam.a ../lib/bfile.a ../lib/util.a + +$(TPROG2): $(O2) ../lib/util.a + $(CC) $(CFLAGS) -o $(TPROG2) $(O2) ../lib/util.a + +$(TPROG3): $(O3) ../lib/dict.a ../lib/isam.a ../lib/bfile.a ../lib/rset.a ../lib/util.a + $(CC) $(CFLAGS) -o $(TPROG3) $(O3) ../lib/rset.a ../lib/dict.a ../lib/isam.a ../lib/bfile.a ../lib/util.a -$(TPROG1): $(O) ../lib/dict.a ../lib/bfile.a ../lib/util.a - $(CC) $(CFLAGS) -o $(TPROG1) $(O) ../lib/dict.a ../lib/bfile.a ../lib/util.a .c.o: $(CC) -c $(DEFS) $(CFLAGS) $< @@ -21,7 +32,7 @@ $(TPROG1): $(O) ../lib/dict.a ../lib/bfile.a ../lib/util.a clean: rm -f *.[oa] $(TPROG1) $(TPROG2) core mon.out gmon.out errlist -$(O): index.h +$(O1) $(O2) $(O3): index.h depend: depend2 diff --git a/index/extract.c b/index/extract.c index 34cb68b..fc170f6 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,12 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.1 1995-09-01 14:06:35 adam + * Revision 1.2 1995-09-04 09:10:34 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + * Revision 1.1 1995/09/01 14:06:35 adam * Split of work into more files. * */ @@ -35,6 +40,7 @@ void key_open (const char *fname) log (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); exit (1); } + log (LOG_DEBUG, "key_open of %s", fname); if (!(key_buf = malloc (KEY_BUF_SIZE))) { log (LOG_FATAL|LOG_ERRNO, "malloc"); @@ -53,21 +59,28 @@ void key_open (const char *fname) sysno_next = 1; } -void key_close (void) +int key_close (void) { if (key_fd == -1) - return; + { + log (LOG_DEBUG, "key_close - but no file"); + return 0; + } close (key_fd); dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); dict_close (file_idx); key_fd = -1; + log (LOG_DEBUG, "key close - key file exist"); + return 1; } void key_flush (void) { size_t i = 0; int w; - + + if (key_fd == -1) + return; while (i < key_offset) { w = write (key_fd, key_buf + i, key_offset - i); @@ -83,17 +96,18 @@ void key_flush (void) void key_write (int cmd, struct it_key *k, const char *str) { - char x = cmd; + char x; size_t slen = strlen(str); if (key_offset + sizeof(*k) + slen >= KEY_BUF_SIZE - 2) key_flush (); + x = (cmd == 'a') ? 1 : 0; + memcpy (key_buf + key_offset, str, slen+1); + key_offset += slen+1; memcpy (key_buf + key_offset, &x, 1); key_offset++; memcpy (key_buf + key_offset, k, sizeof(*k)); key_offset += sizeof(*k); - memcpy (key_buf + key_offset, str, slen+1); - key_offset += slen+1; } void text_extract (SYSNO sysno, int cmd, const char *fname) diff --git a/index/index.h b/index/index.h index 22c8419..84c12c8 100644 --- a/index/index.h +++ b/index/index.h @@ -4,7 +4,12 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: index.h,v $ - * Revision 1.3 1995-09-01 14:06:35 adam + * Revision 1.4 1995-09-04 09:10:35 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + * Revision 1.3 1995/09/01 14:06:35 adam * Split of work into more files. * * Revision 1.2 1995/09/01 10:30:24 adam @@ -37,6 +42,11 @@ void repository (int cmd, const char *rep, const char *base_path); void file_extract (int cmd, const char *fname, const char *kname); void key_open (const char *fname); -void key_close (void); +int key_close (void); void key_flush (void); void key_write (int cmd, struct it_key *k, const char *str); +int key_compare (const void *p1, const void *p2); +int key_compare_x (const struct it_key *i1, const struct it_key *i2); +void key_input (const char *dict_fname, const char *isam_fname, + const char *key_fname, int cache); +int key_sort (const char *key_fname, size_t mem); diff --git a/index/kcompare.c b/index/kcompare.c new file mode 100644 index 0000000..5217ed9 --- /dev/null +++ b/index/kcompare.c @@ -0,0 +1,41 @@ +/* + * Copyright (C) 1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: kcompare.c,v $ + * Revision 1.1 1995-09-04 09:10:36 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + */ + +#include +#include +#include +#include + +#include "index.h" + +int key_compare (const void *p1, const void *p2) +{ + struct it_key i1, i2; + memcpy (&i1, p1, sizeof(i1)); + memcpy (&i2, p2, sizeof(i2)); + if ( i1.sysno != i2.sysno) + return i1.sysno - i2.sysno; + if ( i1.seqno != i2.seqno) + return i1.seqno - i2.seqno; + return i1.field - i2.field; +} + +int key_compare_x (const struct it_key *i1, const struct it_key *i2) +{ + if ( i1->sysno != i2->sysno) + return i1->sysno - i2->sysno; + if ( i1->seqno != i2->seqno) + return i1->seqno - i2->seqno; + return i1->field - i2->field; +} + diff --git a/index/kdump.c b/index/kdump.c new file mode 100644 index 0000000..8dfe387 --- /dev/null +++ b/index/kdump.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: kdump.c,v $ + * Revision 1.1 1995-09-04 09:10:36 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + */ +#include +#include +#include + +#include +#include "index.h" + +char *prog; + +static int read_one (FILE *inf, char *name, char *key) +{ + int c; + int i = 0; + name[0] = 0; + do + { + if ((c=getc(inf)) == EOF) + return 0; + name[i++] = c; + } while (c); + for (i = 0; i +#include +#include +#include +#include + +#include "index.h" + +#define KEY_SIZE (1+sizeof(struct it_key)) +#define INP_NAME_MAX 8192 +#define INP_BUF_START 60000 +#define INP_BUF_ADD 400000 + +static int no_diffs = 0; +static int no_updates = 0; +static int no_insertions = 0; +static int no_iterations = 0; + +static int read_one (FILE *inf, char *name, char *key) +{ + int c; + int i = 0; + name[0] = 0; + do + { + if ((c=getc(inf)) == EOF) + return 0; + name[i++] = c; + } while (c); + for (i = 0; i= key_buf_size) + { + char *new_key_buf; + new_key_buf = xmalloc (key_buf_size + INP_BUF_ADD); + memcpy (new_key_buf, key_buf, key_buf_size); + key_buf_size += INP_BUF_ADD; + xfree (key_buf); + key_buf = new_key_buf; + } + } + no_diffs++; + nmemb = key_buf_ptr / KEY_SIZE; + assert (nmemb*KEY_SIZE == key_buf_ptr); + if ((info = dict_lookup (dict, cur_name))) + { + ISAM_P isam_p, isam_p2; + log (LOG_DEBUG, "updating %s", cur_name); + no_updates++; + memcpy (&isam_p, info+1, sizeof(ISAM_P)); + isam_p2 = is_merge (isam, isam_p, nmemb, key_buf); + if (isam_p2 != isam_p) + dict_insert (dict, cur_name, sizeof(ISAM_P), &isam_p2); + } + else + { + ISAM_P isam_p; + log (LOG_DEBUG, "inserting %s", cur_name); + no_insertions++; + isam_p = is_merge (isam, 0, nmemb, key_buf); + dict_insert (dict, cur_name, sizeof(ISAM_P), &isam_p); + } + memcpy (key_buf, next_key, KEY_SIZE); + strcpy (cur_name, next_name); + } + fclose (inf); + return 0; +} + +void key_input (const char *dict_fname, const char *isam_fname, + const char *key_fname, int cache) +{ + Dict dict; + ISAM isam; + + dict = dict_open (dict_fname, cache, 1); + if (!dict) + { + log (LOG_FATAL, "dict_open fail of `%s'", dict_fname); + exit (1); + } + isam = is_open (isam_fname, key_compare, 1); + if (!isam) + { + log (LOG_FATAL, "is_open fail of `%s'", isam_fname); + exit (1); + } + inp (dict, isam, key_fname); + dict_close (dict); + is_close (isam); + log (LOG_LOG, "Iterations . . .%7d", no_iterations); + log (LOG_LOG, "Distinct words .%7d", no_diffs); + log (LOG_LOG, "Updates. . . . .%7d", no_updates); + log (LOG_LOG, "Insertions . . .%7d", no_insertions); +} diff --git a/index/main.c b/index/main.c index 7127c84..389585d 100644 --- a/index/main.c +++ b/index/main.c @@ -4,7 +4,12 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.4 1995-09-01 14:06:36 adam + * Revision 1.5 1995-09-04 09:10:39 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + * Revision 1.4 1995/09/01 14:06:36 adam * Split of work into more files. * * Revision 1.3 1995/09/01 10:57:07 adam @@ -68,6 +73,7 @@ int main (int argc, char **argv) } else { + unlink ("keys.tmp"); key_open ("keys.tmp"); repository (cmd, arg, base_path); cmd = 0; @@ -94,6 +100,12 @@ int main (int argc, char **argv) exit (1); } key_flush (); - key_close (); + if (!key_close ()) + exit (0); + log (LOG_DEBUG, "Sorting"); + if (!key_sort ("keys.tmp", 1000000)) + exit (0); + log (LOG_DEBUG, "Input"); + key_input ("dictinv", "isaminv", "keys.tmp", 50); exit (0); } diff --git a/index/zrpn.c b/index/zrpn.c new file mode 100644 index 0000000..08897a4 --- /dev/null +++ b/index/zrpn.c @@ -0,0 +1,143 @@ +/* + * Copyright (C) 1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: zrpn.c,v $ + * Revision 1.1 1995-09-04 09:10:40 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + */ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "index.h" + +static Dict dict; +static ISAM isam; + +static RSET rpn_search_APT (Z_AttributesPlusTerm *zapt) +{ + struct rset_isam_parms parms; + const char *info; + Z_Term *term = zapt->term; + + if (term->which != Z_Term_general) + return NULL; + if (!(info = dict_lookup (dict, term->u.general->buf))) + return NULL; + assert (*info == sizeof(parms.pos)); + memcpy (&parms.pos, info+1, sizeof(parms.pos)); + parms.is = isam; + return rset_create (rset_kind_isam, &parms); +} + +static RSET rpn_search_and (RSET r_l, RSET r_r) +{ + struct it_key k1, k2; + RSET r_dst; + int i1, i2; + rset_open (r_l, 0); + rset_open (r_r, 0); + r_dst = rset_create (rset_kind_temp, NULL); + rset_open (r_dst, 1); + + i1 = rset_read (r_l, &k1); + i2 = rset_read (r_r, &k2); + while (i1 && i2) + { + if (k1.sysno > k2.sysno) + i2 = rset_read (r_r, &k2); + else if (k1.sysno < k2.sysno) + i1 = rset_read (r_l, &k1); + else if (!(i1 = key_compare_x (&k1, &k2))) + { + rset_write (r_dst, &k1); + i1 = rset_read (r_l, &k1); + i2 = rset_read (r_r, &k2); + } + else if (i1 > 0) + { + rset_write (r_dst, &k2); + i2 = rset_read (r_r, &k2); + } + else + { + rset_write (r_dst, &k1); + i1 = rset_read (r_l, &k1); + } + } + rset_close (r_dst); + return r_dst; +} + +static RSET rpn_search_or (RSET r_l, RSET r_r) +{ + return r_l; +} + +static RSET rpn_search_not (RSET r_l, RSET r_r) +{ + return r_l; +} + +static RSET rpn_search_ref (Z_ResultSetId *resultSetId) +{ + return NULL; +} + +static RSET rpn_search_structure (Z_RPNStructure *zs) +{ + RSET r; + if (zs->which == Z_RPNStructure_complex) + { + RSET r_l, r_r; + + r_l = rpn_search_structure (zs->u.complex->s1); + r_r = rpn_search_structure (zs->u.complex->s2); + + switch (zs->u.complex->operator->which) + { + case Z_Operator_and: + r = rpn_search_and (r_l, r_r); + break; + case Z_Operator_or: + r = rpn_search_or (r_l, r_r); + break; + case Z_Operator_and_not: + r = rpn_search_not (r_l, r_r); + break; + default: + assert (0); + } + rset_delete (r_l); + rset_delete (r_r); + } + else if (zs->which == Z_RPNStructure_simple) + { + if (zs->u.simple->which == Z_Operand_APT) + r = rpn_search_APT (zs->u.simple->u.attributesPlusTerm); + else if (zs->u.simple->which == Z_Operand_resultSetId) + r = rpn_search_ref (zs->u.simple->u.resultSetId); + else + { + assert (0); + } + } + else + { + assert (0); + } + return r; +} diff --git a/index/zserver.c b/index/zserver.c new file mode 100644 index 0000000..aa6e84c --- /dev/null +++ b/index/zserver.c @@ -0,0 +1,61 @@ +/* + * Copyright (C) 1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: zserver.c,v $ + * Revision 1.1 1995-09-04 09:10:41 adam + * More work on index add/del/update. + * Merge sort implemented. + * Initial work on z39 server. + * + */ +#include +#include +#include + +#include +#include "index.h" + +char *prog; + +int main (int argc, char **argv) +{ + int ret; + char *arg; + char *base_name = NULL; + + prog = *argv; + while ((ret = options ("v:", argv, argc, &arg)) != -2) + { + if (ret == 0) + { + if (!base_name) + { + base_name = arg; + + common_resource = res_open (base_name); + if (!common_resource) + { + log (LOG_FATAL, "Cannot open resource `%s'", base_name); + exit (1); + } + } + } + else if (ret == 'v') + { + log_init (log_mask_str(arg), prog, NULL); + } + else + { + log (LOG_FATAL, "Unknown option '-%s'", arg); + exit (1); + } + } + if (!base_name) + { + fprintf (stderr, "search [-v log] base ...\n"); + exit (1); + } + exit (0); +} -- 1.7.10.4