From caa7fe057dd8617129577a6725d2c4e3da066857 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 6 Sep 1995 16:11:15 +0000 Subject: [PATCH] Option: only one word key per file. --- index/Makefile | 4 +-- index/dir.c | 9 ++++--- index/extract.c | 40 ++++++++++++++++++++++----- index/index.h | 17 ++++++++++-- index/kcompare.c | 15 +++++++++-- index/kdump.c | 7 +++-- index/kinput.c | 9 ++++--- index/main.c | 15 ++++++----- index/symtab.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ index/trav.c | 7 +++-- index/zrpn.c | 7 +++-- index/zserver.c | 14 ++++++---- index/zserver.h | 7 +++-- index/zsets.c | 7 +++-- 14 files changed, 198 insertions(+), 39 deletions(-) create mode 100644 index/symtab.c diff --git a/index/Makefile b/index/Makefile index a30b8be..ec6c61b 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.6 1995-09-05 15:28:39 adam Exp $ +# $Id: Makefile,v 1.7 1995-09-06 16:11:15 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -12,7 +12,7 @@ TPROG1=index TPROG2=kdump TPROG3=zserver DEFS=$(INCLUDE) -O1 = main.o dir.o trav.o extract.o kinput.o kcompare.o ksort.o +O1 = main.o dir.o trav.o extract.o kinput.o kcompare.o ksort.o symtab.o O2 = kdump.o O3 = zserver.o kcompare.o zrpn.o zsets.o CPP=cc -E diff --git a/index/dir.c b/index/dir.c index 6d0e11d..867f2b4 100644 --- a/index/dir.c +++ b/index/dir.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dir.c,v $ - * Revision 1.4 1995-09-04 12:33:41 adam + * Revision 1.5 1995-09-06 16:11:16 adam + * Option: only one word key per file. + * + * Revision 1.4 1995/09/04 12:33:41 adam * Various cleanup. YAZ util used instead. * * Revision 1.3 1995/09/01 14:06:35 adam @@ -37,7 +40,7 @@ struct dir_entry *dir_open (const char *rep) size_t idx = 0; struct dir_entry *entry; - logf (LOG_DEBUG, "dir_open %s", rep); + logf (LOG_LOG, "dir_open %s", rep); if (!(dir = opendir(rep))) { logf (LOG_WARN|LOG_ERRNO, "opendir %s", rep); diff --git a/index/extract.c b/index/extract.c index a763b7b..15e0873 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.4 1995-09-05 15:28:39 adam + * Revision 1.5 1995-09-06 16:11:16 adam + * Option: only one word key per file. + * + * Revision 1.4 1995/09/05 15:28:39 adam * More work on search engine. * * Revision 1.3 1995/09/04 12:33:41 adam @@ -54,7 +57,7 @@ void key_open (const char *fname) exit (1); } key_offset = 0; - if (!(file_idx = dict_open (FNAME_FILE_DICT, 10, 1))) + if (!(file_idx = dict_open (FNAME_FILE_DICT, 40, 1))) { logf (LOG_FATAL, "dict_open fail of %s", "fileidx"); exit (1); @@ -123,7 +126,27 @@ void key_write (int cmd, struct it_key *k, const char *str) key_offset += sizeof(*k); } -void text_extract (SYSNO sysno, int cmd, const char *fname) +void key_write_x (struct strtab *t, int cmd, struct it_key *k, const char *str) +{ + void **oldinfo; + + if (strtab_src (t, str, &oldinfo)) + ((struct it_key *) *oldinfo)->seqno++; + else + { + *oldinfo = xmalloc (sizeof(*k)); + memcpy (*oldinfo, k, sizeof(*k)); + ((struct it_key *) *oldinfo)->seqno = 1; + } +} + +void key_rec_flush (const char *str, void *info, void *data) +{ + key_write (*((int*) data), (struct it_key *)info, str); + xfree (info); +} + +void text_extract (struct strtab *t, SYSNO sysno, int cmd, const char *fname) { FILE *inf; struct it_key k; @@ -152,8 +175,10 @@ void text_extract (SYSNO sysno, int cmd, const char *fname) w[i] = 0; k.seqno = seqno++; +#if IT_KEY_HAVE_FIELD k.field = 0; - key_write (cmd, &k, w); +#endif + key_write_x (t, cmd, &k, w); } if (c == EOF) break; @@ -169,6 +194,7 @@ void file_extract (int cmd, const char *fname, const char *kname) char ext_res[128]; const char *file_type; void *file_info; + struct strtab *t; logf (LOG_DEBUG, "%c %s k=%s", cmd, fname, kname); for (i = strlen(fname); --i >= 0; ) @@ -196,8 +222,10 @@ void file_extract (int cmd, const char *fname, const char *kname) } else memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); + t = strtab_mk (); if (!strcmp (file_type, "text")) - text_extract (sysno, cmd, fname); + text_extract (t, sysno, cmd, fname); + strtab_del (t, key_rec_flush, &cmd); } diff --git a/index/index.h b/index/index.h index d5d626a..4a95800 100644 --- a/index/index.h +++ b/index/index.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: index.h,v $ - * Revision 1.6 1995-09-05 15:28:39 adam + * Revision 1.7 1995-09-06 16:11:16 adam + * Option: only one word key per file. + * + * Revision 1.6 1995/09/05 15:28:39 adam * More work on search engine. * * Revision 1.5 1995/09/04 12:33:42 adam @@ -30,10 +33,14 @@ #include #include +#define IT_KEY_HAVE_FIELD 0 + struct it_key { int sysno; int seqno; +#if IT_KEY_HAVE_FIELD int field; +#endif }; struct dir_entry { @@ -62,3 +69,9 @@ int key_sort (const char *key_fname, size_t mem); #define FNAME_FILE_DICT "filedict" #define FNAME_SYS_IDX "sysidx" #define SYS_IDX_ENTRY_LEN 120 + +struct strtab *strtab_mk (void); +int strtab_src (struct strtab *t, const char *name, void ***infop); +void strtab_del (struct strtab *t, + void (*func)(const char *name, void *info, void *data), + void *data); diff --git a/index/kcompare.c b/index/kcompare.c index 5217ed9..7ce3a24 100644 --- a/index/kcompare.c +++ b/index/kcompare.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: kcompare.c,v $ - * Revision 1.1 1995-09-04 09:10:36 adam + * Revision 1.2 1995-09-06 16:11:17 adam + * Option: only one word key per file. + * + * Revision 1.1 1995/09/04 09:10:36 adam * More work on index add/del/update. * Merge sort implemented. * Initial work on z39 server. @@ -25,17 +28,25 @@ int key_compare (const void *p1, const void *p2) memcpy (&i2, p2, sizeof(i2)); if ( i1.sysno != i2.sysno) return i1.sysno - i2.sysno; +#if IT_KEY_HAVE_FIELD if ( i1.seqno != i2.seqno) return i1.seqno - i2.seqno; return i1.field - i2.field; +#else + return i1.seqno - i2.seqno; +#endif } int key_compare_x (const struct it_key *i1, const struct it_key *i2) { if ( i1->sysno != i2->sysno) return i1->sysno - i2->sysno; +#if IT_KEY_HAVE_FIELD if ( i1->seqno != i2->seqno) return i1->seqno - i2->seqno; return i1->field - i2->field; +#else + return i1->seqno - i2->seqno; +#endif } diff --git a/index/kdump.c b/index/kdump.c index b9f9ae0..a37d379 100644 --- a/index/kdump.c +++ b/index/kdump.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: kdump.c,v $ - * Revision 1.2 1995-09-04 12:33:42 adam + * Revision 1.3 1995-09-06 16:11:17 adam + * Option: only one word key per file. + * + * Revision 1.2 1995/09/04 12:33:42 adam * Various cleanup. YAZ util used instead. * * Revision 1.1 1995/09/04 09:10:36 adam diff --git a/index/kinput.c b/index/kinput.c index 9e6a773..9f834a9 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: kinput.c,v $ - * Revision 1.2 1995-09-04 12:33:42 adam + * Revision 1.3 1995-09-06 16:11:17 adam + * Option: only one word key per file. + * + * Revision 1.2 1995/09/04 12:33:42 adam * Various cleanup. YAZ util used instead. * * Revision 1.1 1995/09/04 09:10:37 adam @@ -130,7 +133,7 @@ void key_input (const char *dict_fname, const char *isam_fname, logf (LOG_FATAL, "dict_open fail of `%s'", dict_fname); exit (1); } - isam = is_open (isam_fname, key_compare, 1); + isam = is_open (isam_fname, key_compare, 1, sizeof(struct it_key)); if (!isam) { logf (LOG_FATAL, "is_open fail of `%s'", isam_fname); diff --git a/index/main.c b/index/main.c index 71e8325..f8e1362 100644 --- a/index/main.c +++ b/index/main.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.7 1995-09-05 15:28:39 adam + * Revision 1.8 1995-09-06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.7 1995/09/05 15:28:39 adam * More work on search engine. * * Revision 1.6 1995/09/04 12:33:43 adam @@ -108,11 +111,11 @@ int main (int argc, char **argv) key_flush (); if (!key_close ()) exit (0); - logf (LOG_DEBUG, "Sorting"); - if (!key_sort ("keys.tmp", 1000000)) + logf (LOG_LOG, "Sorting"); + if (!key_sort ("keys.tmp", 3000000)) exit (0); - logf (LOG_DEBUG, "Input"); - key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, "keys.tmp", 50); + logf (LOG_LOG, "Input"); + key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, "keys.tmp", 60); exit (0); } diff --git a/index/symtab.c b/index/symtab.c new file mode 100644 index 0000000..d656621 --- /dev/null +++ b/index/symtab.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 1994-1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: symtab.c,v $ + * Revision 1.1 1995-09-06 16:11:18 adam + * Option: only one word key per file. + * + */ +#include +#include +#include + +#include +#include "index.h" + +struct strentry { + char *name; + void *info; + struct strentry *next; +}; + +#define STR_HASH 401 + +struct strtab { + struct strentry *ar[STR_HASH]; +}; + +struct strtab *strtab_mk (void) +{ + int i; + struct strtab *p = xmalloc (sizeof (*p)); + for (i=0; iar[i] = NULL; + return p; +} + +int strtab_src (struct strtab *t, const char *name, void ***infop) +{ + unsigned hash = 0; + int i; + struct strentry *e; + + for (i=0; name[i]; i++) + hash += hash*65519 + name[i]; + hash = hash % STR_HASH; + for (e = t->ar[hash]; e; e = e->next) + if (!strcmp(e->name, name)) + { + *infop = &e->info; + return 1; + } + e = xmalloc (sizeof(*e)); + e->name = xmalloc (strlen(name)+1); + strcpy (e->name, name); + e->next = t->ar[hash]; + t->ar[hash] = e; + *infop = &e->info; + return 0; +} + +void strtab_del (struct strtab *t, + void (*func)(const char *name, void *info, void *data), + void *data) +{ + int i; + struct strentry *e, *e1; + + for (i = 0; iar[i]; e; e = e1) + { + e1 = e->next; + (*func)(e->name, e->info, data); + free (e->name); + free (e); + } + free (t); +} diff --git a/index/trav.c b/index/trav.c index 4accc88..b87dbab 100644 --- a/index/trav.c +++ b/index/trav.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: trav.c,v $ - * Revision 1.2 1995-09-04 12:33:43 adam + * Revision 1.3 1995-09-06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.2 1995/09/04 12:33:43 adam * Various cleanup. YAZ util used instead. * * Revision 1.1 1995/09/01 14:06:36 adam diff --git a/index/zrpn.c b/index/zrpn.c index 46d7afd..23b8243 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.5 1995-09-06 10:33:04 adam + * Revision 1.6 1995-09-06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.5 1995/09/06 10:33:04 adam * More work on present. Some log messages removed. * * Revision 1.4 1995/09/05 15:28:40 adam diff --git a/index/zserver.c b/index/zserver.c index 8fa470f..204f5e4 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.4 1995-09-06 10:33:04 adam + * Revision 1.5 1995-09-06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.4 1995/09/06 10:33:04 adam * More work on present. Some log messages removed. * * Revision 1.3 1995/09/05 15:28:40 adam @@ -47,20 +50,21 @@ bend_initresult *bend_init (bend_initrequest *q) r.errstring = "dict_open fail: filedict"; return &r; } - if (!(server_info.fileDict = dict_open (FNAME_FILE_DICT, 5, 0))) + if (!(server_info.fileDict = dict_open (FNAME_FILE_DICT, 10, 0))) { r.errcode = 1; r.errstring = "dict_open fail: filedict"; return &r; } - if (!(server_info.wordDict = dict_open (FNAME_WORD_DICT, 20, 0))) + if (!(server_info.wordDict = dict_open (FNAME_WORD_DICT, 40, 0))) { dict_close (server_info.fileDict); r.errcode = 1; r.errstring = "dict_open fail: worddict"; return &r; } - if (!(server_info.wordIsam = is_open (FNAME_WORD_ISAM, key_compare, 0))) + if (!(server_info.wordIsam = is_open (FNAME_WORD_ISAM, key_compare, 0, + sizeof (struct it_key)))) { dict_close (server_info.wordDict); dict_close (server_info.fileDict); diff --git a/index/zserver.h b/index/zserver.h index 33e566b..5190026 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.h,v $ - * Revision 1.1 1995-09-05 15:28:40 adam + * Revision 1.2 1995-09-06 16:11:19 adam + * Option: only one word key per file. + * + * Revision 1.1 1995/09/05 15:28:40 adam * More work on search engine. * */ diff --git a/index/zsets.c b/index/zsets.c index 2963e4f..cc1c544 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.2 1995-09-06 10:33:04 adam + * Revision 1.3 1995-09-06 16:11:19 adam + * Option: only one word key per file. + * + * Revision 1.2 1995/09/06 10:33:04 adam * More work on present. Some log messages removed. * * Revision 1.1 1995/09/05 15:28:40 adam -- 1.7.10.4