From ef9dd3a80ee0fa95abeeb0a0e2d66856b6dd2d05 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 14 Sep 1995 11:53:27 +0000 Subject: [PATCH] First work on regular expressions/truncations. --- index/Makefile | 7 +- index/zrpn.c | 359 +++++++++++++++++++++++++++++++++++++++++++------------ index/zserver.h | 7 +- 3 files changed, 295 insertions(+), 78 deletions(-) diff --git a/index/Makefile b/index/Makefile index 97f5505..d7c5d00 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,12 +1,13 @@ # Copyright (C) 1995, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.9 1995-09-14 07:48:20 adam Exp $ +# $Id: Makefile,v 1.10 1995-09-14 11:53:27 adam Exp $ SHELL=/bin/sh RANLIB=ranlib YAZ=../../yaz YAZLIB=$(YAZ)/lib/libyaz.a +OSILIB=../../xtimosi/src/libmosi.a $(YAZ)/lib/librfc.a INCLUDE=-I../include -I$(YAZ)/include TPROG1=index TPROG2=kdump @@ -22,7 +23,7 @@ all: $(TPROG1) $(TPROG2) $(TPROG3) $(TPROG1): $(O1) ../lib/dict.a \ ../lib/isam.a ../lib/bfile.a ../lib/alexutil.a $(YAZLIB) $(CC) $(CFLAGS) -o $(TPROG1) $(O1) ../lib/dict.a \ - ../lib/isam.a ../lib/bfile.a ../lib/alexutil.a $(YAZLIB) + ../lib/isam.a ../lib/bfile.a ../lib/alexutil.a $(YAZLIB) $(OSILIB) $(TPROG2): $(O2) $(YAZLIB) $(CC) $(CFLAGS) -o $(TPROG2) $(O2) $(YAZLIB) @@ -32,7 +33,7 @@ $(TPROG3): $(O3) \ ../lib/dfa.a ../lib/alexutil.a $(YAZLIB) $(CC) $(CFLAGS) -o $(TPROG3) $(O3) \ ../lib/rset.a ../lib/dict.a ../lib/isam.a ../lib/bfile.a \ - ../lib/dfa.a ../lib/alexutil.a $(YAZLIB) -lm + ../lib/dfa.a ../lib/alexutil.a $(YAZLIB) $(OSILIB) -lm .c.o: $(CC) -c $(DEFS) $(CFLAGS) $< diff --git a/index/zrpn.c b/index/zrpn.c index 3a2491a..c0e9b06 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.10 1995-09-11 15:23:26 adam + * Revision 1.11 1995-09-14 11:53:27 adam + * First work on regular expressions/truncations. + * + * Revision 1.10 1995/09/11 15:23:26 adam * More work on relevance search. * * Revision 1.9 1995/09/11 13:09:35 adam @@ -52,18 +55,149 @@ #include #include -int split_term (ZServerInfo *zi, Z_Term *term, ISAM_P **isam_ps, int *no) +/* + * attr_print: log attributes + */ +static void attr_print (Z_AttributesPlusTerm *t) +{ + int of, i; + for (of = 0; of < t->num_attributes; of++) + { + Z_AttributeElement *element; + element = t->attributeList[of]; + + switch (element->which) + { + case Z_AttributeValue_numeric: + logf (LOG_DEBUG, "attributeType=%d value=%d", + *element->attributeType, + *element->value.numeric); + break; + case Z_AttributeValue_complex: + logf (LOG_DEBUG, "attributeType=%d complex", + *element->attributeType); + for (i = 0; ivalue.complex->num_list; i++) + { + if (element->value.complex->list[i]->which == + Z_StringOrNumeric_string) + logf (LOG_DEBUG, " string: '%s'", + element->value.complex->list[i]->u.string); + else if (element->value.complex->list[i]->which == + Z_StringOrNumeric_numeric) + logf (LOG_DEBUG, " numeric: '%d'", + *element->value.complex->list[i]->u.numeric); + } + break; + default: + assert (0); + } + } +} + +typedef struct { + int type; + int major; + int minor; + Z_AttributesPlusTerm *zapt; +} AttrType; + +static int attr_find (AttrType *src) +{ + while (src->major < src->zapt->num_attributes) + { + Z_AttributeElement *element; + element = src->zapt->attributeList[src->major]; + + if (src->type == *element->attributeType) + { + switch (element->which) + { + case Z_AttributeValue_numeric: + ++(src->major); + return *element->value.numeric; + break; + case Z_AttributeValue_complex: + if (src->minor >= element->value.complex->num_list || + element->value.complex->list[src->minor]->which != + Z_StringOrNumeric_numeric) + break; + ++(src->minor); + return *element->value.complex->list[src->minor-1]->u.numeric; + default: + assert (0); + } + } + ++(src->major); + } + return -1; +} + +static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, + int type) +{ + src->zapt = zapt; + src->type = type; + src->major = 0; + src->minor = 0; +} + +static ISAM_P *isam_p_buf = NULL; +static int isam_p_size = 0; +static int isam_p_indx; + +static void add_isam_p (const char *info) +{ + if (isam_p_indx == isam_p_size) + { + ISAM_P *new_isam_p_buf; + + isam_p_size = 2*isam_p_size + 100; + new_isam_p_buf = xmalloc (sizeof(*new_isam_p_buf) * + isam_p_size); + if (isam_p_buf) + { + memcpy (new_isam_p_buf, isam_p_buf, + isam_p_indx * sizeof(*isam_p_buf)); + xfree (isam_p_buf); + } + isam_p_buf = new_isam_p_buf; + } + assert (*info == sizeof(*isam_p_buf)); + memcpy (isam_p_buf + isam_p_indx, info+1, sizeof(*isam_p_buf)); + isam_p_indx++; +} + +static int grep_handle (Dict_char *name, const char *info) +{ + logf (LOG_DEBUG, "dict name: %s", name); + add_isam_p (info); + return 0; +} + +static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, + ISAM_P **isam_ps, + int *no, int split_flag) { - static ISAM_P isam_p[16]; - int isam_p_indx = 0; char termz[IT_MAX_WORD+1]; char term_sub[IT_MAX_WORD+1]; - int sizez, i; - char *p0, *p1; - const char *info; - + char term_dict[2*IT_MAX_WORD+2]; + int sizez, i, j; + char *p0 = termz, *p1 = NULL; + const char *info; + AttrType truncation; + int truncation_value; + Z_Term *term = zapt->term; + + isam_p_indx = 0; + attr_init (&truncation, zapt, 5); + truncation_value = attr_find (&truncation); + logf (LOG_DEBUG, "truncation value %d", truncation_value); + *no = 0; if (term->which != Z_Term_general) - return 0; + { + zi->errCode = 124; + return -1; + } sizez = term->u.general->len; if (sizez > IT_MAX_WORD) sizez = IT_MAX_WORD; @@ -71,32 +205,60 @@ int split_term (ZServerInfo *zi, Z_Term *term, ISAM_P **isam_ps, int *no) termz[i] = index_char_cvt (term->u.general->buf[i]); termz[i] = '\0'; - p0 = termz; while (1) { - if ((p1 = strchr (p0, ' '))) + if (split_flag && (p1 = strchr (p0, ' '))) { memcpy (term_sub, p0, p1-p0); term_sub[p1-p0] = '\0'; } else strcpy (term_sub, p0); - logf (LOG_DEBUG, "dict_lookup: %s", term_sub); - if ((info = dict_lookup (zi->wordDict, term_sub))) + switch (truncation_value) { - logf (LOG_DEBUG, " found"); - assert (*info == sizeof(*isam_p)); - memcpy (isam_p + isam_p_indx, info+1, sizeof(*isam_p)); - isam_p_indx++; + case -1: /* not specified */ + case 100: /* do not truncate */ + strcpy (term_dict, term_sub); + logf (LOG_DEBUG, "dict_lookup: %s", term_dict); + if ((info = dict_lookup (zi->wordDict, term_dict))) + add_isam_p (info); + break; + case 1: /* right truncation */ + strcpy (term_dict, term_sub); + strcat (term_dict, ".*"); + dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); + break; + case 2: /* left truncation */ + case 3: /* left&right truncation */ + zi->errCode = 120; + return -1; + case 101: /* process # in term */ + for (j = 0, i = 0; term_sub[i] && i < 3; i++) + term_dict[j++] = term_sub[i]; + for (; term_sub[i]; i++) + if (term_sub[i] == '#') + { + term_dict[j++] = '.'; + term_dict[j++] = '*'; + } + else + term_dict[j++] = term_sub[i]; + term_dict[j] = '\0'; + dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); + break; + case 102: /* regular expression */ + strcpy (term_dict, term_sub); + dict_lookup_grep (zi->wordDict, term_dict, 0, grep_handle); + break; } if (!p1) break; p0 = p1+1; } - *isam_ps = isam_p; + *isam_ps = isam_p_buf; *no = isam_p_indx; logf (LOG_DEBUG, "%d positions", *no); - return 1; + return 0; } static RSET rpn_search_APT_relevance (ZServerInfo *zi, @@ -105,46 +267,109 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, rset_relevance_parms parms; parms.key_size = sizeof(struct it_key); - parms.max_rec = 10; + parms.max_rec = 100; parms.cmp = key_compare; parms.is = zi->wordIsam; - split_term (zi, zapt->term, &parms.isam_positions, - &parms.no_isam_positions); + if (trunc_term (zi, zapt, &parms.isam_positions, + &parms.no_isam_positions, 1)) + return NULL; if (parms.no_isam_positions > 0) return rset_create (rset_kind_relevance, &parms); else return rset_create (rset_kind_null, NULL); } -static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt) +static RSET rpn_search_APT_word (ZServerInfo *zi, + Z_AttributesPlusTerm *zapt) { -#if 0 - Z_Term *term = zapt->term; - char termz[IT_MAX_WORD+1]; - size_t sizez; - struct rset_isam_parms parms; - const char *info; - int i; + ISAM_P *isam_positions; + int no_isam_positions; + rset_isam_parms parms; - if (term->which != Z_Term_general) - return NULL; - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD) - sizez = IT_MAX_WORD; - for (i = 0; iu.general->buf[i]); - termz[i] = '\0'; - logf (LOG_DEBUG, "dict_lookup: %s", termz); - if (!(info = dict_lookup (zi->wordDict, termz))) + if (trunc_term (zi, zapt, &isam_positions, + &no_isam_positions, 0)) + return NULL; + if (no_isam_positions != 1) + return rset_create (rset_kind_null, NULL); + parms.is = zi->wordIsam; + parms.pos = *isam_positions; + return rset_create (rset_kind_isam, &parms); +} + +static RSET rpn_search_APT_phrase (ZServerInfo *zi, + Z_AttributesPlusTerm *zapt) +{ + ISAM_P *isam_positions; + int no_isam_positions; + rset_isam_parms parms; + + if (trunc_term (zi, zapt, &isam_positions, + &no_isam_positions, 1)) + return NULL; + if (no_isam_positions != 1) return rset_create (rset_kind_null, NULL); - assert (*info == sizeof(parms.pos)); - memcpy (&parms.pos, info+1, sizeof(parms.pos)); parms.is = zi->wordIsam; - logf (LOG_DEBUG, "rset_create isam"); + parms.pos = *isam_positions; return rset_create (rset_kind_isam, &parms); -#else - return rpn_search_APT_relevance (zi, zapt); -#endif +} + +static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt) +{ + AttrType relation; + AttrType structure; + int relation_value, structure_value; + + attr_init (&relation, zapt, 2); + attr_init (&structure, zapt, 4); + + relation_value = attr_find (&relation); + structure_value = attr_find (&structure); + switch (structure_value) + { + case -1: + if (relation_value == 102) /* relevance relation */ + return rpn_search_APT_relevance (zi, zapt); + return rpn_search_APT_word (zi, zapt); + case 1: /* phrase */ + if (relation_value == 102) /* relevance relation */ + return rpn_search_APT_relevance (zi, zapt); + return rpn_search_APT_phrase (zi, zapt); + break; + case 2: /* word */ + if (relation_value == 102) /* relevance relation */ + return rpn_search_APT_relevance (zi, zapt); + return rpn_search_APT_word (zi, zapt); + case 3: /* key */ + break; + case 4: /* year */ + break; + case 5: /* date - normalized */ + break; + case 6: /* word list */ + return rpn_search_APT_relevance (zi, zapt); + case 100: /* date - un-normalized */ + break; + case 101: /* name - normalized */ + break; + case 102: /* date - un-normalized */ + break; + case 103: /* structure */ + break; + case 104: /* urx */ + break; + case 105: /* free-form-text */ + return rpn_search_APT_relevance (zi, zapt); + case 106: /* document-text */ + return rpn_search_APT_relevance (zi, zapt); + case 107: /* local-number */ + break; + case 108: /* string */ + return rpn_search_APT_word (zi, zapt); + case 109: /* numeric string */ + break; + } + zi->errCode = 118; + return NULL; } static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId) @@ -164,7 +389,14 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs) rset_bool_parms bool_parms; bool_parms.rset_l = rpn_search_structure (zi, zs->u.complex->s1); + if (bool_parms.rset_l == NULL) + return NULL; bool_parms.rset_r = rpn_search_structure (zi, zs->u.complex->s2); + if (bool_parms.rset_r == NULL) + { + rset_delete (bool_parms.rset_l); + return NULL; + } bool_parms.key_size = sizeof(struct it_key); bool_parms.cmp = key_compare; @@ -207,24 +439,14 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs) return r; } -static RSET rpn_save_set (RSET r, int *count) +static void count_set (RSET r, int *count) { -#if 0 - RSET d; - rset_temp_parms parms; -#endif int psysno = 0; struct it_key key; RSFD rfd; logf (LOG_DEBUG, "rpn_save_set"); *count = 0; -#if 0 - parms.key_size = sizeof(struct it_key); - d = rset_create (rset_kind_temp, &parms); - rset_open (d, 1); -#endif - rfd = rset_open (r, 0); while (rset_read (r, rfd, &key)) { @@ -233,35 +455,24 @@ static RSET rpn_save_set (RSET r, int *count) psysno = key.sysno; (*count)++; } -#if 0 - rset_write (d, &key); -#endif } rset_close (r, rfd); -#if 0 - rset_close (d); -#endif logf (LOG_DEBUG, "%d distinct sysnos", *count); -#if 0 - return d; -#endif } int rpn_search (ZServerInfo *zi, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname, int *hits) { - RSET rset, result_rset; + RSET rset; + zi->errCode = 0; + zi->errString = NULL; rset = rpn_search_structure (zi, rpn->RPNStructure); if (!rset) - return 0; - result_rset = rpn_save_set (rset, hits); -#if 0 - rset_delete (result_rset); -#endif - + return zi->errCode; + count_set (rset, hits); resultSetAdd (zi, setname, 1, rset); - return 0; + return zi->errCode; } diff --git a/index/zserver.h b/index/zserver.h index 250b188..c686aae 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.h,v $ - * Revision 1.3 1995-09-08 08:53:23 adam + * Revision 1.4 1995-09-14 11:53:28 adam + * First work on regular expressions/truncations. + * + * Revision 1.3 1995/09/08 08:53:23 adam * Record buffer maintained in server_info. * * Revision 1.2 1995/09/06 16:11:19 adam @@ -38,6 +41,8 @@ typedef struct { Dict fileDict; int sys_idx_fd; char *recordBuf; + int errCode; + char *errString; } ZServerInfo; int rpn_search (ZServerInfo *zi, -- 1.7.10.4