From 4e45c8e2e01c52e8d03dbd9a0873a9e2ff7e5f91 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 14 May 1996 06:16:35 +0000 Subject: [PATCH] Compact use/set bytes used in search service. --- index/Makefile | 4 +- index/extract.c | 54 ++++++++------- index/zinfo.c | 34 +++++++--- index/zrpn.c | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++---- index/zserver.c | 7 +- index/zserver.h | 8 ++- 6 files changed, 252 insertions(+), 51 deletions(-) diff --git a/index/Makefile b/index/Makefile index 9246d15..aa2bd28 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.34 1996-05-13 14:23:03 adam Exp $ +# $Id: Makefile,v 1.35 1996-05-14 06:16:35 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -23,7 +23,7 @@ O1 = main.o dir.o dirs.o trav.o extract.o kinput.o kcompare.o \ lockutil.o lockidx.o zinfo.o O2 = kdump.o O3 = zserver.o kcompare.o zrpn.o zsets.o text.o recctrl.o structrec.o \ - attribute.o recindex.o zlogs.o regxread.o lockutil.o locksrv.o + attribute.o recindex.o zlogs.o regxread.o lockutil.o locksrv.o zinfo.o CPP=$(CC) -E all: $(TPROG1) $(TPROG2) $(TPROG3) diff --git a/index/extract.c b/index/extract.c index 62a7613..9a08b1b 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.57 1996-05-13 14:23:04 adam + * Revision 1.58 1996-05-14 06:16:38 adam + * Compact use/set bytes used in search service. + * + * Revision 1.57 1996/05/13 14:23:04 adam * Work on compaction of set/use bytes in dictionary. * * Revision 1.56 1996/05/09 09:54:42 adam @@ -437,32 +440,35 @@ static void addRecordKey (const RecWord *p) else reckeys.prevAttrUse = attrUse; + *dst++ = lead; + + if (!(lead & 1)) + { + memcpy (dst, &attrSet, sizeof(attrSet)); + dst += sizeof(attrSet); + } + if (!(lead & 2)) + { + memcpy (dst, &attrUse, sizeof(attrUse)); + dst += sizeof(attrUse); + } switch (p->which) { - case Word_String: case Word_Phrase: - *dst++ = lead; - - if (!(lead & 1)) - { - memcpy (dst, &attrSet, sizeof(attrSet)); - dst += sizeof(attrSet); - } - if (!(lead & 2)) - { - memcpy (dst, &attrUse, sizeof(attrUse)); - dst += sizeof(attrUse); - } - for (i = 0; p->u.string[i]; i++) - *dst++ = p->u.string[i]; - *dst++ = '\0'; - - memcpy (dst, &p->seqno, sizeof(p->seqno)); - dst += sizeof(p->seqno); - - break; - default: - return; + case Word_String: + *dst++ = 'w'; + break; + case Word_Phrase: + *dst++ = 'p'; + break; + case Word_Numeric: + *dst++ = 'n'; } + for (i = 0; p->u.string[i]; i++) + *dst++ = p->u.string[i]; + *dst++ = '\0'; + + memcpy (dst, &p->seqno, sizeof(p->seqno)); + dst += sizeof(p->seqno); reckeys.buf_used = dst - reckeys.buf; } diff --git a/index/zinfo.c b/index/zinfo.c index 4f34867..b30a570 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zinfo.c,v $ - * Revision 1.1 1996-05-13 14:23:07 adam + * Revision 1.2 1996-05-14 06:16:41 adam + * Compact use/set bytes used in search service. + * + * Revision 1.1 1996/05/13 14:23:07 adam * Work on compaction of set/use bytes in dictionary. * */ @@ -30,6 +33,7 @@ struct zebDatabaseInfo { char *databaseName; int sysno; int readFlag; + int dirty; struct zebDatabaseInfo *next; }; @@ -47,14 +51,13 @@ void zebTargetInfo_close (ZebTargetInfo *zti, int writeFlag) if (writeFlag) { - Record grec; char p0[4096], *p = p0; memcpy (p, &zti->dictNum, sizeof(zti->dictNum)); p += sizeof(zti->dictNum); for (zdi = zti->databaseInfo; zdi; zdi=zdi->next) { - if (zdi->readFlag || !zdi->sysno) + if (zdi->dirty) { char q0[4096], *q = q0; struct zebSUInfoB *zsui; @@ -89,13 +92,18 @@ void zebTargetInfo_close (ZebTargetInfo *zti, int writeFlag) memcpy (p, &zdi->sysno, sizeof(zdi->sysno)); p += sizeof(zdi->sysno); } - *p = '\0'; - grec = rec_get (zti->records, 1); - xfree (grec->info[0]); - grec->size[0] = p-p0; - grec->info[0] = xmalloc (grec->size[0]); - memcpy (grec->info[0], p0, grec->size[0]); - rec_put (zti->records, &grec); + *p++ = '\0'; + if (zti->dirty) + { + Record grec = rec_get (zti->records, 1); + + assert (grec); + xfree (grec->info[0]); + grec->size[0] = p-p0; + grec->info[0] = xmalloc (grec->size[0]); + memcpy (grec->info[0], p0, grec->size[0]); + rec_put (zti->records, &grec); + } } for (zdi = zti->databaseInfo; zdi; zdi = zdi1) { @@ -144,6 +152,7 @@ ZebTargetInfo *zebTargetInfo_open (Records records, int writeFlag) memcpy (&(*zdi)->sysno, p, sizeof((*zdi)->sysno)); p += sizeof((*zdi)->sysno); (*zdi)->readFlag = 1; + (*zdi)->dirty = 0; zdi = &(*zdi)->next; } assert (p - rec->info[0] == rec->size[0]-1); @@ -157,6 +166,7 @@ ZebTargetInfo *zebTargetInfo_open (Records records, int writeFlag) rec->info[0] = xmalloc (1+sizeof(zti->dictNum)); memcpy (rec->info[0], &zti->dictNum, sizeof(zti->dictNum)); rec->info[0][sizeof(zti->dictNum)] = '\0'; + rec->size[0] = sizeof(zti->dictNum)+1; rec_put (records, &rec); } } @@ -230,6 +240,8 @@ int zebTargetInfo_newDatabase (ZebTargetInfo *zti, const char *database) zdi->readFlag = 0; zdi->databaseName = xstrdup (database); zdi->SUInfo = NULL; + zdi->dirty = 1; + zti->dirty = 1; zti->curDatabaseInfo = zdi; return 0; } @@ -256,6 +268,8 @@ int zebTargetInfo_addSU (ZebTargetInfo *zti, int set, int use) zsui = xmalloc (sizeof(*zsui)); zsui->next = zti->curDatabaseInfo->SUInfo; zti->curDatabaseInfo->SUInfo = zsui; + zti->curDatabaseInfo->dirty = 1; + zti->dirty = 1; zsui->info.set = set; zsui->info.use = use; zsui->info.ordinal = (zti->dictNum)++; diff --git a/index/zrpn.c b/index/zrpn.c index 289adfa..409cea0 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.43 1996-05-09 09:54:43 adam + * Revision 1.44 1996-05-14 06:16:44 adam + * Compact use/set bytes used in search service. + * + * Revision 1.43 1996/05/09 09:54:43 adam * Server supports maps from one logical attributes to a list of physical * attributes. * The extraction process doesn't make space consuming 'any' keys. @@ -686,6 +689,124 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, for (base_no = 0; base_no < num_bases; base_no++) { +#if 1 + attent *attp; + data1_local_attribute *local_attr; + int max_pos, prefix_len = 0; + + attp = att_getentbyatt (curAttributeSet, use_value); + if (!attp) + { + zi->errCode = 114; + return -1; + } + if (zebTargetInfo_curDatabase (zi->zti, basenames[base_no])) + { + zi->errCode = 109; /* Database unavailable */ + zi->errString = basenames[base_no]; + } + for (local_attr = attp->local_attributes; local_attr; + local_attr = local_attr->next) + { + int ord; + + ord = zebTargetInfo_lookupSU (zi->zti, attp->attset_ordinal, + local_attr->local); + if (ord < 0) + continue; + if (prefix_len) + term_dict[prefix_len++] = '|'; + else + term_dict[prefix_len++] = '('; + if ((ord >= 'A' && ord <= 'Z') || (ord >= 'a' && ord <= 'z')) + term_dict[prefix_len++] = ord; + else + { + term_dict[prefix_len++] = '\\'; + term_dict[prefix_len++] = ord; + } + } + if (!prefix_len) + { + zi->errCode = 114; + return -1; + } + term_dict[prefix_len++] = ')'; + term_dict[prefix_len] = '\0'; + if (!relational_term (zi, zapt, term_sub, term_dict, + attributeSet, grep_info, &max_pos)) + { + const char *cp; + + j = prefix_len; + switch (truncation_value) + { + case -1: /* not specified */ + case 100: /* do not truncate */ + term_dict[j++] = '('; + for (i = 0; term_sub[i]; i++) + verbatim_char (term_sub[i], &j, term_dict); + strcpy (term_dict+j, ")"); + r = dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r); + break; + case 1: /* right truncation */ + term_dict[j++] = '('; + for (i = 0; term_sub[i]; i++) + verbatim_char (term_sub[i], &j, term_dict); + strcpy (term_dict+j, ".*)"); + dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + break; + case 2: /* left truncation */ + case 3: /* left&right truncation */ + zi->errCode = 120; + return -1; + case 101: /* process # in term */ + term_dict[j++] = '('; + for (i=0; term_sub[i]; i++) + if (term_sub[i] == '#' && i > 2) + { + term_dict[j++] = '.'; + term_dict[j++] = '*'; + } + else + verbatim_char (term_sub[i], &j, term_dict); + strcpy (term_dict+j, ")"); + r = dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", + r); + break; + case 102: /* regular expression */ + sprintf (term_dict + j, "(%s)", term_sub); + r = dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d", + r); + break; + case 103: /* regular expression with error correction */ + cp = term_sub; + r = 0; + if (*cp == '*' && cp[1] && cp[2]) + { + r = atoi (cp+1); + cp += 2; + } + sprintf (term_dict + j, "(%s)", cp); + r = dict_lookup_grep (zi->wordDict, term_dict, r, grep_info, + &max_pos, j, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d", + r); + break; + } + } +#else int max_pos; #if 1 attent *attp; @@ -874,23 +995,25 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return -1; } #endif +#endif } logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx); return 0; } static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, - char *termz) + int regType, char *termz) { size_t i, sizez; Z_Term *term = zapt->term; sizez = term->u.general->len; - if (sizez > IT_MAX_WORD) - sizez = IT_MAX_WORD; + if (sizez > IT_MAX_WORD-1) + sizez = IT_MAX_WORD-1; + termz[0] = regType; for (i = 0; i < sizez; i++) - termz[i] = index_char_cvt (term->u.general->buf[i]); - termz[i] = '\0'; + termz[i+1] = index_char_cvt (term->u.general->buf[i]); + termz[i+1] = '\0'; } static RSET rpn_search_APT_relevance (ZServerInfo *zi, @@ -915,7 +1038,8 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, zi->errCode = 124; return NULL; } - trans_term (zi, zapt, termz); + trans_term (zi, zapt, 'w', termz); + grep_info.isam_p_indx = 0; grep_info.isam_p_size = 0; grep_info.isam_p_buf = NULL; @@ -947,6 +1071,44 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, return result; } +static RSET rpn_search_APT_cphrase (ZServerInfo *zi, + Z_AttributesPlusTerm *zapt, + oid_value attributeSet, + int num_bases, char **basenames) +{ + rset_isam_parms parms; + char termz[IT_MAX_WORD+1]; + struct grep_info grep_info; + RSET result; + + if (zapt->term->which != Z_Term_general) + { + zi->errCode = 124; + return NULL; + } + trans_term (zi, zapt, 'p', termz); + + grep_info.isam_p_indx = 0; + grep_info.isam_p_size = 0; + grep_info.isam_p_buf = NULL; + + if (field_term (zi, zapt, termz, attributeSet, &grep_info, + num_bases, basenames)) + return NULL; + if (grep_info.isam_p_indx < 1) + result = rset_create (rset_kind_null, NULL); + else if (grep_info.isam_p_indx == 1) + { + parms.is = zi->wordIsam; + parms.pos = *grep_info.isam_p_buf; + result = rset_create (rset_kind_isam, &parms); + } + else + result = rset_trunc (zi->wordIsam, grep_info.isam_p_buf, + grep_info.isam_p_indx); + xfree (grep_info.isam_p_buf); + return result; +} static RSET rpn_search_APT_word (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, oid_value attributeSet, @@ -962,7 +1124,7 @@ static RSET rpn_search_APT_word (ZServerInfo *zi, zi->errCode = 124; return NULL; } - trans_term (zi, zapt, termz); + trans_term (zi, zapt, 'w', termz); grep_info.isam_p_indx = 0; grep_info.isam_p_size = 0; @@ -1089,7 +1251,7 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, zi->errCode = 124; return NULL; } - trans_term (zi, zapt, termz); + trans_term (zi, zapt, 'w', termz); grep_info.isam_p_size = 0; grep_info.isam_p_buf = NULL; @@ -1161,8 +1323,9 @@ static RSET rpn_search_APT_local (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, result = rset_create (rset_kind_temp, &parms); rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); - trans_term (zi, zapt, termz); - key.sysno = atoi (termz); + trans_term (zi, zapt, 'w', termz); + + key.sysno = atoi (termz+1); if (key.sysno <= 0) key.sysno = 1; rset_write (result, rsfd, &key); @@ -1176,25 +1339,34 @@ static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, { AttrType relation; AttrType structure; - int relation_value, structure_value; + AttrType completeness; + int relation_value, structure_value, completeness_value; attr_init (&relation, zapt, 2); attr_init (&structure, zapt, 4); + attr_init (&completeness, zapt, 6); relation_value = attr_find (&relation, NULL); structure_value = attr_find (&structure, NULL); + completeness_value = attr_find (&completeness, NULL); switch (structure_value) { case -1: if (relation_value == 102) /* relevance relation */ return rpn_search_APT_relevance (zi, zapt, attributeSet, num_bases, basenames); + if (completeness_value == 2 || completeness_value == 3) + return rpn_search_APT_cphrase (zi, zapt, attributeSet, + num_bases, basenames); return rpn_search_APT_phrase (zi, zapt, attributeSet, num_bases, basenames); case 1: /* phrase */ if (relation_value == 102) /* relevance relation */ return rpn_search_APT_relevance (zi, zapt, attributeSet, num_bases, basenames); + if (completeness_value == 2 || completeness_value == 3) + return rpn_search_APT_cphrase (zi, zapt, attributeSet, + num_bases, basenames); return rpn_search_APT_phrase (zi, zapt, attributeSet, num_bases, basenames); break; diff --git a/index/zserver.c b/index/zserver.c index c3ea6b4..b6b3d00 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.36 1996-05-01 13:46:37 adam + * Revision 1.37 1996-05-14 06:16:48 adam + * Compact use/set bytes used in search service. + * + * Revision 1.36 1996/05/01 13:46:37 adam * First work on multiple records in one file. * New option, -offset, to the "unread" command in the filter module. * @@ -176,6 +179,7 @@ static int register_lock (ZServerInfo *zi) zi->registerChange = lastChange; if (zi->records) { + zebTargetInfo_close (zi->zti, 0); dict_close (zi->wordDict); is_close (zi->wordIsam); rec_close (&zi->records); @@ -188,6 +192,7 @@ static int register_lock (ZServerInfo *zi) if (!(zi->wordIsam = is_open (FNAME_WORD_ISAM, key_compare, 0, sizeof (struct it_key)))) return -1; + zi->zti = zebTargetInfo_open (zi->records, 0); return 0; } diff --git a/index/zserver.h b/index/zserver.h index 9ee5afc..447d6af 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.h,v $ - * Revision 1.17 1995-12-08 16:22:57 adam + * Revision 1.18 1996-05-14 06:16:50 adam + * Compact use/set bytes used in search service. + * + * Revision 1.17 1995/12/08 16:22:57 adam * Work on update while servers are running. Three lock files introduced. * The servers reload their registers when necessary, but they don't * reestablish result sets yet. @@ -65,7 +68,7 @@ #include #include "index.h" -#include "recindex.h" +#include "zinfo.h" typedef struct { int sysno; @@ -89,6 +92,7 @@ typedef struct { int errCode; char *errString; ODR odr; + ZebTargetInfo *zti; } ZServerInfo; int rpn_search (ZServerInfo *zi, -- 1.7.10.4