From fb99bfcb44d29215f411d82646c59f2f74f5a25c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 5 Sep 1995 15:28:39 +0000 Subject: [PATCH] More work on search engine. --- index/Makefile | 4 +- index/extract.c | 18 +++++- index/index.h | 11 +++- index/main.c | 8 ++- index/zrpn.c | 126 ++++++++++++++++++++++++++++++---------- index/zserver.c | 172 +++++++++++++++++++++++++++++++++++++++++++------------ index/zserver.h | 45 +++++++++++++++ index/zsets.c | 131 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 440 insertions(+), 75 deletions(-) create mode 100644 index/zserver.h create mode 100644 index/zsets.c diff --git a/index/Makefile b/index/Makefile index aa0a2d6..a30b8be 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.5 1995-09-04 12:33:40 adam Exp $ +# $Id: Makefile,v 1.6 1995-09-05 15:28:39 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -14,7 +14,7 @@ TPROG3=zserver DEFS=$(INCLUDE) O1 = main.o dir.o trav.o extract.o kinput.o kcompare.o ksort.o O2 = kdump.o -O3 = zserver.o kcompare.o zrpn.o +O3 = zserver.o kcompare.o zrpn.o zsets.o CPP=cc -E all: $(TPROG1) $(TPROG2) $(TPROG3) diff --git a/index/extract.c b/index/extract.c index 0d905f5..a763b7b 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.3 1995-09-04 12:33:41 adam + * Revision 1.4 1995-09-05 15:28:39 adam + * More work on search engine. + * + * Revision 1.3 1995/09/04 12:33:41 adam * Various cleanup. YAZ util used instead. * * Revision 1.2 1995/09/04 09:10:34 adam @@ -30,6 +33,7 @@ static Dict file_idx; static SYSNO sysno_next; static int key_fd = -1; +static int sys_idx_fd = -1; static char *key_buf; static int key_offset; @@ -40,7 +44,7 @@ void key_open (const char *fname) return; if ((key_fd = open (fname, O_RDWR|O_CREAT, 0666)) == -1) { - logf (LOG_FATAL|LOG_ERRNO, "Creat %s", fname); + logf (LOG_FATAL|LOG_ERRNO, "open %s", fname); exit (1); } logf (LOG_DEBUG, "key_open of %s", fname); @@ -50,7 +54,7 @@ void key_open (const char *fname) exit (1); } key_offset = 0; - if (!(file_idx = dict_open ("fileidx", 10, 1))) + if (!(file_idx = dict_open (FNAME_FILE_DICT, 10, 1))) { logf (LOG_FATAL, "dict_open fail of %s", "fileidx"); exit (1); @@ -60,6 +64,11 @@ void key_open (const char *fname) memcpy (&sysno_next, (char*)file_key+1, sizeof(sysno_next)); else sysno_next = 1; + if ((sys_idx_fd = open (FNAME_SYS_IDX, O_RDWR|O_CREAT, 0666)) == -1) + { + logf (LOG_FATAL|LOG_ERRNO, "open %s", FNAME_SYS_IDX); + exit (1); + } } int key_close (void) @@ -70,6 +79,7 @@ int key_close (void) return 0; } close (key_fd); + close (sys_idx_fd); dict_insert (file_idx, ".", sizeof(sysno_next), &sysno_next); dict_close (file_idx); key_fd = -1; @@ -181,6 +191,8 @@ void file_extract (int cmd, const char *fname, const char *kname) { sysno = sysno_next++; dict_insert (file_idx, kname, sizeof(sysno), &sysno); + lseek (sys_idx_fd, sysno * SYS_IDX_ENTRY_LEN, SEEK_SET); + write (sys_idx_fd, kname, strlen(kname)+1); } else memcpy (&sysno, (char*) file_info+1, sizeof(sysno)); diff --git a/index/index.h b/index/index.h index 6e7144a..d5d626a 100644 --- a/index/index.h +++ b/index/index.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: index.h,v $ - * Revision 1.5 1995-09-04 12:33:42 adam + * Revision 1.6 1995-09-05 15:28:39 adam + * More work on search engine. + * + * Revision 1.5 1995/09/04 12:33:42 adam * Various cleanup. YAZ util used instead. * * Revision 1.4 1995/09/04 09:10:35 adam @@ -53,3 +56,9 @@ int key_compare_x (const struct it_key *i1, const struct it_key *i2); void key_input (const char *dict_fname, const char *isam_fname, const char *key_fname, int cache); int key_sort (const char *key_fname, size_t mem); + +#define FNAME_WORD_DICT "worddict" +#define FNAME_WORD_ISAM "wordisam" +#define FNAME_FILE_DICT "filedict" +#define FNAME_SYS_IDX "sysidx" +#define SYS_IDX_ENTRY_LEN 120 diff --git a/index/main.c b/index/main.c index 753ee50..71e8325 100644 --- a/index/main.c +++ b/index/main.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.6 1995-09-04 12:33:43 adam + * Revision 1.7 1995-09-05 15:28:39 adam + * More work on search engine. + * + * Revision 1.6 1995/09/04 12:33:43 adam * Various cleanup. YAZ util used instead. * * Revision 1.5 1995/09/04 09:10:39 adam @@ -109,6 +112,7 @@ int main (int argc, char **argv) if (!key_sort ("keys.tmp", 1000000)) exit (0); logf (LOG_DEBUG, "Input"); - key_input ("dictinv", "isaminv", "keys.tmp", 50); + key_input (FNAME_WORD_DICT, FNAME_WORD_ISAM, "keys.tmp", 50); exit (0); } + diff --git a/index/zrpn.c b/index/zrpn.c index 4e30697..17c0dbc 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.3 1995-09-04 15:20:22 adam + * Revision 1.4 1995-09-05 15:28:40 adam + * More work on search engine. + * + * Revision 1.3 1995/09/04 15:20:22 adam * Minor changes. * * Revision 1.2 1995/09/04 12:33:43 adam @@ -20,22 +23,12 @@ #include #include -#include -#include -#include +#include "zserver.h" + #include #include -#include - -#include "index.h" - -struct index_info { - Dict dict; - ISAM isam; -}; - -static RSET rpn_search_APT (struct index_info *ii, Z_AttributesPlusTerm *zapt) +static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt) { struct rset_isam_parms parms; const char *info; @@ -43,15 +36,22 @@ static RSET rpn_search_APT (struct index_info *ii, Z_AttributesPlusTerm *zapt) if (term->which != Z_Term_general) return NULL; - if (!(info = dict_lookup (ii->dict, term->u.general->buf))) - return NULL; + logf (LOG_DEBUG, "dict_lookup: %s", term->u.general->buf); + if (!(info = dict_lookup (zi->wordDict, term->u.general->buf))) + { + rset_temp_parms parms; + + parms.key_size = sizeof(struct it_key); + return rset_create (rset_kind_temp, &parms); + } assert (*info == sizeof(parms.pos)); memcpy (&parms.pos, info+1, sizeof(parms.pos)); - parms.is = ii->isam; + parms.is = zi->wordIsam; + logf (LOG_DEBUG, "rset_create isam"); return rset_create (rset_kind_isam, &parms); } -static RSET rpn_search_and (struct index_info *ii, RSET r_l, RSET r_r) +static RSET rpn_search_and (ZServerInfo *zi, RSET r_l, RSET r_r) { struct it_key k1, k2; RSET r_dst; @@ -90,54 +90,59 @@ static RSET rpn_search_and (struct index_info *ii, RSET r_l, RSET r_r) return r_dst; } -static RSET rpn_search_or (struct index_info *ii, RSET r_l, RSET r_r) +static RSET rpn_search_or (ZServerInfo *zi, RSET r_l, RSET r_r) { return r_l; } -static RSET rpn_search_not (struct index_info *ii, RSET r_l, RSET r_r) +static RSET rpn_search_not (ZServerInfo *zi, RSET r_l, RSET r_r) { return r_l; } -static RSET rpn_search_ref (struct index_info *ii, Z_ResultSetId *resultSetId) +static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId) { return NULL; } -static RSET rpn_search_structure (struct index_info *ii, Z_RPNStructure *zs) +static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs) { RSET r; if (zs->which == Z_RPNStructure_complex) { RSET r_l, r_r; - r_l = rpn_search_structure (ii, zs->u.complex->s1); - r_r = rpn_search_structure (ii, zs->u.complex->s2); + r_l = rpn_search_structure (zi, zs->u.complex->s1); + r_r = rpn_search_structure (zi, zs->u.complex->s2); switch (zs->u.complex->operator->which) { case Z_Operator_and: - r = rpn_search_and (ii, r_l, r_r); + rset_delete (r_r); break; case Z_Operator_or: - r = rpn_search_or (ii, r_l, r_r); + rset_delete (r_r); break; case Z_Operator_and_not: - r = rpn_search_not (ii, r_l, r_r); + rset_delete (r_r); break; default: assert (0); } - rset_delete (r_l); - rset_delete (r_r); + r = r_l; } else if (zs->which == Z_RPNStructure_simple) { if (zs->u.simple->which == Z_Operand_APT) - r = rpn_search_APT (ii, zs->u.simple->u.attributesPlusTerm); + { + logf (LOG_DEBUG, "rpn_search_APT"); + r = rpn_search_APT (zi, zs->u.simple->u.attributesPlusTerm); + } else if (zs->u.simple->which == Z_Operand_resultSetId) - r = rpn_search_ref (ii, zs->u.simple->u.resultSetId); + { + logf (LOG_DEBUG, "rpn_search_ref"); + r = rpn_search_ref (zi, zs->u.simple->u.resultSetId); + } else { assert (0); @@ -149,3 +154,62 @@ static RSET rpn_search_structure (struct index_info *ii, Z_RPNStructure *zs) } return r; } + +static RSET rpn_save_set (RSET r, int *count) +{ +#if 0 + RSET d; + rset_temp_parms parms; +#endif + int psysno = 0; + struct it_key key; + + logf (LOG_DEBUG, "rpn_save_set"); + *count = 0; +#if 0 + parms.key_size = sizeof(struct it_key); + d = rset_create (rset_kind_temp, &parms); + rset_open (d, 1); +#endif + + rset_open (r, 0); + while (rset_read (r, &key)) + { + if (key.sysno != psysno) + { + psysno = key.sysno; + (*count)++; + } + logf (LOG_DEBUG, "lllllllllllllllll"); +#if 0 + rset_write (d, &key); +#endif + } + rset_close (r); +#if 0 + rset_close (d); +#endif + logf (LOG_DEBUG, "%d distinct sysnos", *count); +#if 0 + return d; +#endif +} + +int rpn_search (ZServerInfo *zi, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname, int *hits) +{ + RSET rset, result_rset; + + rset = rpn_search_structure (zi, rpn->RPNStructure); + if (!rset) + return 0; + result_rset = rpn_save_set (rset, hits); +#if 0 + rset_delete (result_rset); +#endif + + resultSetAdd (zi, setname, 1, rset); + return 0; +} + diff --git a/index/zserver.c b/index/zserver.c index ea0eb6e..61e4bd6 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.2 1995-09-04 12:33:43 adam + * Revision 1.3 1995-09-05 15:28:40 adam + * More work on search engine. + * + * Revision 1.2 1995/09/04 12:33:43 adam * Various cleanup. YAZ util used instead. * * Revision 1.1 1995/09/04 09:10:41 adam @@ -16,49 +19,146 @@ #include #include #include +#include -#include -#include "index.h" +#include "zserver.h" -char *prog; +#include +#include -int main (int argc, char **argv) +ZServerInfo server_info; + +bend_initresult *bend_init (bend_initrequest *q) +{ + static bend_initresult r; + static char *name = "zserver"; + + r.errcode = 0; + r.errstring = 0; + r.handle = name; + + server_info.sets = NULL; + if (!(server_info.sys_idx_fd = open (FNAME_SYS_IDX, O_RDONLY))) + { + r.errcode = 1; + r.errstring = "dict_open fail: filedict"; + return &r; + } + if (!(server_info.fileDict = dict_open (FNAME_FILE_DICT, 5, 0))) + { + r.errcode = 1; + r.errstring = "dict_open fail: filedict"; + return &r; + } + if (!(server_info.wordDict = dict_open (FNAME_WORD_DICT, 20, 0))) + { + dict_close (server_info.fileDict); + r.errcode = 1; + r.errstring = "dict_open fail: worddict"; + return &r; + } + if (!(server_info.wordIsam = is_open (FNAME_WORD_ISAM, key_compare, 0))) + { + dict_close (server_info.wordDict); + dict_close (server_info.fileDict); + r.errcode = 1; + r.errstring = "is_open fail: wordisam"; + return &r; + } + return &r; +} + +bend_searchresult *bend_search (void *handle, bend_searchrequest *q, int *fd) { - int ret; - char *arg; - char *base_name = NULL; + static bend_searchresult r; + + r.errcode = 0; + r.errstring = 0; + r.hits = 0; - prog = *argv; - while ((ret = options ("v:", argv, argc, &arg)) != -2) + switch (q->query->which) { - if (ret == 0) - { - if (!base_name) - { - base_name = arg; - - common_resource = res_open (base_name); - if (!common_resource) - { - logf (LOG_FATAL, "Cannot open resource `%s'", base_name); - exit (1); - } - } - } - else if (ret == 'v') - { - log_init (log_mask_str(arg), prog, NULL); - } - else - { - logf (LOG_FATAL, "Unknown option '-%s'", arg); - exit (1); - } + case Z_Query_type_1: + r.errcode = rpn_search (&server_info, q->query->u.type_1, + q->num_bases, q->basenames, q->setname, + &r.hits); + break; + default: + r.errcode = 107; } - if (!base_name) + return &r; +} + +bend_fetchresult *bend_fetch (void *handle, bend_fetchrequest *q, int *num) +{ + static bend_fetchresult r; + int positions[2]; + ZServerRecord *records; + + r.errstring = 0; + r.last_in_set = 0; + r.basename = "base"; + + positions[0] = q->number; + records = resultSetRecordGet (&server_info, q->setname, 1, positions); + if (!records) + { + logf (LOG_DEBUG, "resultSetRecordGet, error"); + r.errcode = 13; + return &r; + } + r.len = records[0].size; + r.record = malloc (r.len+1); + strcpy (r.record, records[0].buf); + resultSetRecordDel (&server_info, records, 1); + r.format = VAL_SUTRS; + r.errcode = 0; + return &r; +} + +bend_deleteresult *bend_delete (void *handle, bend_deleterequest *q, int *num) +{ + return 0; +} + +bend_scanresult *bend_scan (void *handle, bend_scanrequest *q, int *num) +{ + static struct scan_entry list[200]; + static char buf[200][200]; + static bend_scanresult r; + int i; + + r.term_position = q->term_position; + r.num_entries = q->num_entries; + r.entries = list; + for (i = 0; i < r.num_entries; i++) + { + list[i].term = buf[i]; + sprintf(list[i].term, "term-%d", i+1); + list[i].occurrences = rand() % 100000; + } + r.errcode = 0; + r.errstring = 0; + return &r; +} + +void bend_close (void *handle) +{ + dict_close (server_info.fileDict); + dict_close (server_info.wordDict); + is_close (server_info.wordIsam); + close (server_info.sys_idx_fd); + return; +} + +int main (int argc, char **argv) +{ + char *base_name = "base"; + + if (!(common_resource = res_open (base_name))) { - fprintf (stderr, "zserver [-v log] base ...\n"); + logf (LOG_FATAL, "Cannot open resource `%s'", base_name); exit (1); } - exit (0); + return statserv_main (argc, argv); } diff --git a/index/zserver.h b/index/zserver.h new file mode 100644 index 0000000..33e566b --- /dev/null +++ b/index/zserver.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: zserver.h,v $ + * Revision 1.1 1995-09-05 15:28:40 adam + * More work on search engine. + * + */ + +#include "index.h" +#include +#include + +typedef struct { + size_t size; + char *buf; +} ZServerRecord; + +typedef struct ZServerSet_ { + char *name; + RSET rset; + int size; + struct ZServerSet_ *next; +} ZServerSet; + +typedef struct { + ZServerSet *sets; + Dict wordDict; + ISAM wordIsam; + Dict fileDict; + int sys_idx_fd; +} ZServerInfo; + +int rpn_search (ZServerInfo *zi, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname, int *hits); + +ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, + int ov, RSET rset); +ZServerSet *resultSetGet (ZServerInfo *zi, const char *name); +ZServerRecord *resultSetRecordGet (ZServerInfo *zi, const char *name, + int num, int *positions); +void resultSetRecordDel (ZServerInfo *zi, ZServerRecord *records, int num); diff --git a/index/zsets.c b/index/zsets.c new file mode 100644 index 0000000..223103a --- /dev/null +++ b/index/zsets.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 1995, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: zsets.c,v $ + * Revision 1.1 1995-09-05 15:28:40 adam + * More work on search engine. + * + */ +#include +#include +#include + +#include "zserver.h" +#include + +ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, int ov, RSET rset) +{ + ZServerSet *s; + + for (s = zi->sets; s; s = s->next) + if (!strcmp (s->name, name)) + { + if (!ov) + return NULL; + rset_delete (s->rset); + s->rset = rset; + return s; + } + s = xmalloc (sizeof(*s)); + s->next = zi->sets; + zi->sets = s; + s->name = xmalloc (strlen(name)+1); + strcpy (s->name, name); + s->rset = rset; + return s; +} + +ZServerSet *resultSetGet (ZServerInfo *zi, const char *name) +{ + ZServerSet *s; + + for (s = zi->sets; s; s = s->next) + if (!strcmp (s->name, name)) + return s; + return NULL; +} + +ZServerRecord *resultSetRecordGet (ZServerInfo *zi, const char *name, + int num, int *positions) +{ + ZServerSet *sset; + ZServerRecord *sr; + RSET rset; + int num_i = 0; + int position = 0; + int psysno = 0; + struct it_key key; + + if (!(sset = resultSetGet (zi, name))) + return NULL; + if (!(rset = sset->rset)) + return NULL; + logf (LOG_DEBUG, "resultSetRecordGet"); + sr = xmalloc (sizeof(*sr) * num); + rset_open (rset, 0); + while (rset_read (rset, &key)) + { + logf (LOG_DEBUG, "resultSetRecordGet: %d", key.sysno); + if (key.sysno != psysno) + { + psysno = key.sysno; + position++; + if (position == positions[num_i]) + { + FILE *inf; + char fname[SYS_IDX_ENTRY_LEN]; + + logf (LOG_DEBUG, "get sysno=%d", psysno); + sr[num_i].buf = NULL; + if (lseek (zi->sys_idx_fd, psysno * SYS_IDX_ENTRY_LEN, + SEEK_SET) == -1) + { + logf (LOG_FATAL|LOG_ERRNO, "lseek of sys_idx"); + exit (1); + } + if (read (zi->sys_idx_fd, fname, SYS_IDX_ENTRY_LEN) == -1) + { + logf (LOG_FATAL|LOG_ERRNO, "read of sys_idx"); + exit (1); + } + if (!(inf = fopen (fname, "r"))) + logf (LOG_WARN, "fopen: %s", fname); + else + { + long size; + + fseek (inf, 0L, SEEK_END); + size = ftell (inf); + fseek (inf, 0L, SEEK_SET); + logf (LOG_DEBUG, "get sysno=%d, fname=%s, size=%ld", + psysno, fname, (long) size); + sr[num_i].buf = xmalloc (size+1); + sr[num_i].size = size; + sr[num_i].buf[size] = '\0'; + if (fread (sr[num_i].buf, size, 1, inf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fread %s", fname); + exit (1); + } + fclose (inf); + } + num_i++; + if (num_i == num) + break; + } + } + } + rset_close (rset); + return sr; +} + +void resultSetRecordDel (ZServerInfo *zi, ZServerRecord *records, int num) +{ + int i; + + for (i = 0; i