From 0b5d38bc84d5261aaed3bce3be748c5d7008f2ff Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 8 Nov 1996 11:10:13 +0000 Subject: [PATCH] Buffers used during file match got bigger. Compressed ISAM support everywhere. Bug fixes regarding masking characters in queries. Redesigned Regexp-2 queries. --- index/dirs.c | 24 +++-- index/extract.c | 63 +++++++++++- index/index.h | 10 +- index/invstat.c | 71 ++++++++++--- index/kinput.c | 14 ++- index/main.c | 10 +- index/trunc.c | 14 ++- index/zrpn.c | 306 +++++++++++++++++++++++++++++++++++-------------------- index/zserver.c | 18 +++- 9 files changed, 379 insertions(+), 151 deletions(-) diff --git a/index/dirs.c b/index/dirs.c index 559ff97..c5636ee 100644 --- a/index/dirs.c +++ b/index/dirs.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dirs.c,v $ - * Revision 1.11 1996-10-29 14:06:47 adam + * Revision 1.12 1996-11-08 11:10:13 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.11 1996/10/29 14:06:47 adam * Include zebrautl.h instead of alexutil.h. * * Revision 1.10 1996/06/04 10:18:58 adam @@ -47,14 +53,16 @@ #include "index.h" +#define DIRS_MAX_PATH 1024 + struct dirs_info { Dict dict; int no_read; int no_cur; int no_max; struct dirs_entry *entries; - char nextpath[256]; - char prefix[256]; + char nextpath[DIRS_MAX_PATH]; + char prefix[DIRS_MAX_PATH]; int prelen; struct dirs_entry *last_entry; }; @@ -108,7 +116,7 @@ struct dirs_info *dirs_open (Dict dict, const char *rep) p->prelen = strlen(p->prefix); strcpy (p->nextpath, rep); p->no_read = p->no_cur = 0; - after = p->no_max = 400; + after = p->no_max = 100; p->entries = xmalloc (sizeof(*p->entries) * (p->no_max)); logf (LOG_DEBUG, "dirs_open first scan"); dict_scan (p->dict, p->nextpath, &before, &after, p, dirs_client_proc); @@ -171,7 +179,7 @@ struct dirs_entry *dirs_last (struct dirs_info *p) void dirs_mkdir (struct dirs_info *p, const char *src, time_t mtime) { - char path[256]; + char path[DIRS_MAX_PATH]; sprintf (path, "%s%s", p->prefix, src); logf (LOG_DEBUG, "dirs_mkdir %s", path); @@ -180,7 +188,7 @@ void dirs_mkdir (struct dirs_info *p, const char *src, time_t mtime) void dirs_rmdir (struct dirs_info *p, const char *src) { - char path[256]; + char path[DIRS_MAX_PATH]; sprintf (path, "%s%s", p->prefix, src); logf (LOG_DEBUG, "dirs_rmdir %s", path); @@ -189,7 +197,7 @@ void dirs_rmdir (struct dirs_info *p, const char *src) void dirs_add (struct dirs_info *p, const char *src, int sysno, time_t mtime) { - char path[256]; + char path[DIRS_MAX_PATH]; char info[16]; sprintf (path, "%s%s", p->prefix, src); @@ -201,7 +209,7 @@ void dirs_add (struct dirs_info *p, const char *src, int sysno, time_t mtime) void dirs_del (struct dirs_info *p, const char *src) { - char path[256]; + char path[DIRS_MAX_PATH]; sprintf (path, "%s%s", p->prefix, src); logf (LOG_DEBUG, "dirs_del %s", path); diff --git a/index/extract.c b/index/extract.c index 743d174..53dc896 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.63 1996-10-29 14:09:39 adam + * Revision 1.64 1996-11-08 11:10:16 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.63 1996/10/29 14:09:39 adam * Use of cisam system - enabled if setting isamc is 1. * * Revision 1.62 1996/10/11 10:57:01 adam @@ -349,18 +355,37 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf) } } +static int key_y_len; + +static int key_y_compare (const void *p1, const void *p2) +{ + int r; + + if ((r = key_compare (*(char**) p1 + key_y_len + 1, + *(char**) p2 + key_y_len + 1))) + return r; + return *(*(char**) p1 + key_y_len) - *(*(char**) p2 + key_y_len); +} + +static int key_x_compare (const void *p1, const void *p2) +{ + return strcmp (*(char**) p1, *(char**) p2); +} + void key_flush (void) { FILE *outf; char out_fname[200]; char *prevcp, *cp; struct encode_info encode_info; + int i; if (ptr_i <= 0) return; key_file_no++; logf (LOG_LOG, "sorting section %d", key_file_no); +#if 1 qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_qsort_compare); getFnameTmp (out_fname, key_file_no); @@ -386,6 +411,42 @@ void key_flush (void) else encode_key_write (cp + strlen(cp), &encode_info, outf); } +#else + qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare); + getFnameTmp (out_fname, key_file_no); + + if (!(outf = fopen (out_fname, "w"))) + { + logf (LOG_FATAL|LOG_ERRNO, "fopen (4) %s", out_fname); + exit (1); + } + logf (LOG_LOG, "writing section %d", key_file_no); + i = ptr_i; + prevcp = key_buf[ptr_top-i]; + while (1) + if (!--i || strcmp (prevcp, key_buf[ptr_top-i])) + { + key_y_len = strlen(prevcp)+1; +#if 0 + logf (LOG_LOG, "key_y_len: %2d %02x %02x %s", + key_y_len, prevcp[0], prevcp[1], 2+prevcp); +#endif + qsort (key_buf + ptr_top-ptr_i, ptr_i - i, + sizeof(char*), key_y_compare); + cp = key_buf[ptr_top-ptr_i]; + --key_y_len; + encode_key_init (&encode_info); + encode_key_write (cp, &encode_info, outf); + while (--ptr_i > i) + { + cp = key_buf[ptr_top-ptr_i]; + encode_key_write (cp+key_y_len, &encode_info, outf); + } + if (!i) + break; + prevcp = key_buf[ptr_top-ptr_i]; + } +#endif if (fclose (outf)) { logf (LOG_FATAL|LOG_ERRNO, "fclose %s", out_fname); diff --git a/index/index.h b/index/index.h index a90bf03..164df5d 100644 --- a/index/index.h +++ b/index/index.h @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: index.h,v $ - * Revision 1.45 1996-10-29 14:09:42 adam + * Revision 1.46 1996-11-08 11:10:19 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.45 1996/10/29 14:09:42 adam * Use of cisam system - enabled if setting isamc is 1. * * Revision 1.44 1996/06/06 12:08:40 quinn @@ -229,7 +235,7 @@ int key_close (void); int key_compare (const void *p1, const void *p2); int key_qsort_compare (const void *p1, const void *p2); void key_logdump (int mask, const void *p); -void inv_prstat (const char *dict_fname, const char *isam_fname); +void inv_prstat (void); void key_input (int nkeys, int cache); ISAMC_M key_isamc_m (void); int merge_sort (char **buf, int from, int to); diff --git a/index/invstat.c b/index/invstat.c index 75ff272..8929c44 100644 --- a/index/invstat.c +++ b/index/invstat.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: invstat.c,v $ - * Revision 1.3 1996-06-04 10:18:58 adam + * Revision 1.4 1996-11-08 11:10:21 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.3 1996/06/04 10:18:58 adam * Minor changes - removed include of ctype.h. * * Revision 1.2 1996/05/22 08:25:56 adam @@ -24,6 +30,7 @@ struct inv_stat_info { ISAM isam; + ISAMC isamc; int no_dict_entries; int no_dict_bytes; int isam_bounds[20]; @@ -37,7 +44,6 @@ static int inv_stat_handle (char *name, const char *info, int pos, int occur; int i = 0; struct inv_stat_info *stat_info = (struct inv_stat_info*) client; - ISPT ispt; ISAM_P isam_p; stat_info->no_dict_entries++; @@ -46,11 +52,29 @@ static int inv_stat_handle (char *name, const char *info, int pos, assert (*info == sizeof(ISAM_P)); memcpy (&isam_p, info+1, sizeof(ISAM_P)); - ispt = is_position (stat_info->isam, isam_p); - - occur = is_numkeys (ispt); + if (stat_info->isam) + { + ISPT ispt; - is_pt_free (ispt); + ispt = is_position (stat_info->isam, isam_p); + occur = is_numkeys (ispt); + is_pt_free (ispt); + } + if (stat_info->isamc) + { + ISAMC_PP pp; + int occurx = 0; + char buf[128]; + + pp = isc_pp_open (stat_info->isamc, isam_p); + occur = isc_pp_num (pp); +#if 1 + while (isc_pp_read(pp, buf)) + occurx++; + assert (occurx == occur); +#endif + isc_pp_close (pp); + } while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i]) i++; @@ -59,10 +83,11 @@ static int inv_stat_handle (char *name, const char *info, int pos, return 0; } -void inv_prstat (const char *dict_fname, const char *isam_fname) +void inv_prstat (void) { Dict dict; - ISAM isam; + ISAM isam = NULL; + ISAMC isamc = NULL; Records records; int i, prev; int before = 0; @@ -73,23 +98,36 @@ void inv_prstat (const char *dict_fname, const char *isam_fname) term_dict[0] = 1; term_dict[1] = 0; - dict = dict_open (dict_fname, 100, 0); + dict = dict_open (FNAME_DICT, 100, 0); if (!dict) { - logf (LOG_FATAL, "dict_open fail of `%s'", dict_fname); + logf (LOG_FATAL, "dict_open fail"); exit (1); } - isam = is_open (isam_fname, key_compare, 0, sizeof(struct it_key)); - if (!isam) + if (res_get_match (common_resource, "isam", "c", NULL)) { - logf (LOG_FATAL, "is_open fail of `%s'", isam_fname); - exit (1); + isamc = isc_open (FNAME_ISAMC, 0, key_isamc_m ()); + if (!isamc) + { + logf (LOG_FATAL, "isc_open fail"); + exit (1); + } + } + else + { + isam = is_open (FNAME_ISAM, key_compare, 0, sizeof(struct it_key)); + if (!isam) + { + logf (LOG_FATAL, "is_open fail"); + exit (1); + } } records = rec_open (0); stat_info.no_dict_entries = 0; stat_info.no_dict_bytes = 0; stat_info.isam = isam; + stat_info.isamc = isamc; stat_info.isam_bounds[0] = 1; stat_info.isam_bounds[1] = 2; stat_info.isam_bounds[2] = 3; @@ -117,7 +155,10 @@ void inv_prstat (const char *dict_fname, const char *isam_fname) rec_close (&records); dict_close (dict); - is_close (isam); + if (isam) + is_close (isam); + if (isamc) + isc_close (isamc); fprintf (stderr, "%d dictionary entries. %d bytes for strings\n", stat_info.no_dict_entries, stat_info.no_dict_bytes); diff --git a/index/kinput.c b/index/kinput.c index ac03e13..8a9fb0e 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: kinput.c,v $ - * Revision 1.20 1996-11-01 08:58:41 adam + * Revision 1.21 1996-11-08 11:10:23 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.20 1996/11/01 08:58:41 adam * Interface to isamc system now includes update and delete. * * Revision 1.19 1996/10/29 14:09:46 adam @@ -421,6 +427,7 @@ int heap_inpc (struct heap_info *hi) strcpy (this_name, hci.cur_name); logf (LOG_DEBUG, "inserting %s", 1+hci.cur_name); + no_diffs++; if ((dict_info = dict_lookup (hi->dict, hci.cur_name))) { memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P)); @@ -437,7 +444,6 @@ int heap_inpc (struct heap_info *hi) if (isamc_p2 != isamc_p) dict_insert (hi->dict, this_name, sizeof(ISAMC_P), &isamc_p2); - } } else @@ -591,8 +597,8 @@ void key_input (int nkeys, int cache) isamc = isc_open (FNAME_ISAMC, 1, key_isamc_m ()); if (!isamc) { - logf (LOG_FATAL, "isc_open fail"); - exit (1); + logf (LOG_FATAL, "isc_open fail"); + exit (1); } } else diff --git a/index/main.c b/index/main.c index dcb89a6..c586a29 100644 --- a/index/main.c +++ b/index/main.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.44 1996-10-29 14:09:48 adam + * Revision 1.45 1996-11-08 11:10:26 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.44 1996/10/29 14:09:48 adam * Use of cisam system - enabled if setting isamc is 1. * * Revision 1.43 1996/06/06 12:08:42 quinn @@ -305,7 +311,7 @@ int main (int argc, char **argv) zebraIndexLockMsg ("r"); } rec_prstat (); - inv_prstat (FNAME_DICT, FNAME_ISAM); + inv_prstat (); } else { diff --git a/index/trunc.c b/index/trunc.c index 4a0de39..9ef5ed8 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: trunc.c,v $ - * Revision 1.1 1996-11-04 14:07:40 adam + * Revision 1.2 1996-11-08 11:10:28 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.1 1996/11/04 14:07:40 adam * Moved truncation code to trunc.c. * */ @@ -207,7 +213,7 @@ static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); -#if 0 +#if 1 /* section that preserve all keys */ heap_delete (ti); if (is_readkey (ispt[n], ti->tmpbuf)) @@ -249,7 +255,7 @@ static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, for (i = to-from; --i >= 0; ) { ispt[i] = isc_pp_open (zi->isamc, isam_p[from+i]); - if (isc_read_key (ispt[i], ti->tmpbuf)) + if (isc_pp_read (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); else isc_pp_close (ispt[i]); @@ -270,7 +276,7 @@ static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, /* section that preserve all keys with unique sysnos */ while (1) { - if (!isc_read_key (ispt[n], ti->tmpbuf)) + if (!isc_pp_read (ispt[n], ti->tmpbuf)) { heap_delete (ti); isc_pp_close (ispt[n]); diff --git a/index/zrpn.c b/index/zrpn.c index 8d5da19..7388b13 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,13 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.55 1996-11-04 14:07:44 adam + * Revision 1.56 1996-11-08 11:10:32 adam + * Buffers used during file match got bigger. + * Compressed ISAM support everywhere. + * Bug fixes regarding masking characters in queries. + * Redesigned Regexp-2 queries. + * + * Revision 1.55 1996/11/04 14:07:44 adam * Moved truncation code to trunc.c. * * Revision 1.54 1996/10/29 14:09:52 adam @@ -317,6 +323,134 @@ static int grep_handle (char *name, const char *info, void *p) return 0; } +static int term_pre (char **src, const char *ct1, const char *ct2) +{ + char *s1, *s0 = *src; + char **map; + + /* skip white space */ + while (*s0) + { + if (ct1 && strchr (ct1, *s0)) + break; + if (ct2 && strchr (ct2, *s0)) + break; + s1 = s0; + map = map_chrs_input (&s1, strlen(s1)); + if (**map != *CHR_SPACE) + break; + s0 = s1; + } + *src = s0; + return *s0; +} + +static int term_100 (char **src, char *dst) +{ + char *s0, *s1, **map; + int i = 0; + + if (!term_pre (src, NULL, NULL)) + return 0; + s0 = *src; + while (*s0) + { + s1 = s0; + map = map_chrs_input (&s0, strlen(s0)); + if (**map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + dst[i] = '\0'; + *src = s0; + return i; +} + +static int term_101 (char **src, char *dst) +{ + char *s0, *s1, **map; + int i = 0; + + if (!term_pre (src, "#", "#")) + return 0; + s0 = *src; + while (*s0) + { + if (*s0 == '#') + { + dst[i++] = '.'; + dst[i++] = '*'; + s0++; + } + else + { + s1 = s0; + map = map_chrs_input (&s0, strlen(s0)); + if (**map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + } + dst[i] = '\0'; + *src = s0; + return i; +} + + +static int term_103 (char **src, char *dst, int *errors) +{ + int i = 0; + char *s0, *s1, **map; + + if (!term_pre (src, "\\()[].*+?|", "(")) + return 0; + s0 = *src; + if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] && + isdigit (s0[1])) + { + *errors = s0[1] - '0'; + s0 += 3; + if (*errors > 3) + *errors = 3; + } + while (*s0) + { + if (strchr ("\\()[].*+?|-", *s0)) + dst[i++] = *s0++; + else + { + s1 = s0; + map = map_chrs_input (&s0, strlen(s0)); + if (**map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst[i++] = *s1++; + } + } + } + dst[i] = '\0'; + *src = s0; + return i; +} + +static int term_102 (char **src, char *dst) +{ + return term_103 (src, dst, NULL); +} + /* gen_regular_rel - generate regular expression from relation * val: border value (inclusive) * islt: 1 if <=; 0 if >=. @@ -427,7 +561,7 @@ static void gen_regular_rel (char *dst, int val, int islt) } static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, - const char *term_sub, + char **term_sub, char *term_dict, oid_value attributeSet, struct grep_info *grep_info, @@ -440,29 +574,40 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, attr_init (&relation, zapt, 2); relation_value = attr_find (&relation, NULL); - term_value = atoi (term_sub); switch (relation_value) { case 1: + if (!term_100 (term_sub, term_dict)) + return 0; + term_value = atoi (term_dict); if (term_value <= 0) return 1; logf (LOG_DEBUG, "Relation <"); gen_regular_rel (term_dict + strlen(term_dict), term_value-1, 1); break; case 2: + if (!term_100 (term_sub, term_dict)) + return 0; + term_value = atoi (term_dict); if (term_value < 0) return 1; logf (LOG_DEBUG, "Relation <="); gen_regular_rel (term_dict + strlen(term_dict), term_value, 1); break; case 4: + if (!term_100 (term_sub, term_dict)) + return 0; + term_value = atoi (term_dict); if (term_value < 0) term_value = 0; logf (LOG_DEBUG, "Relation >="); gen_regular_rel (term_dict + strlen(term_dict), term_value, 0); break; case 5: + if (!term_100 (term_sub, term_dict)) + return 0; + term_value = atoi (term_dict); if (term_value < 0) term_value = 0; logf (LOG_DEBUG, "Relation >"); @@ -480,25 +625,19 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return 1; } -static void verbatim_char (int ch, int *indx, char *dst) -{ - if (!isalnum (ch)) - dst[(*indx)++] = '\\'; - dst[(*indx)++] = ch; -} - static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, - const char *term_sub, int regType, + char **term_sub, int regType, oid_value attributeSet, struct grep_info *grep_info, int num_bases, char **basenames) { char term_dict[2*IT_MAX_WORD+2]; - int i, j, r, base_no; + int j, r, base_no; AttrType truncation; int truncation_value; AttrType use; int use_value; oid_value curAttributeSet = attributeSet; + char *termp; attr_init (&use, zapt, 1); use_value = attr_find (&use, &curAttributeSet); @@ -516,6 +655,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, data1_local_attribute *local_attr; int max_pos, prefix_len = 0; + termp = *term_sub; attp = att_getentbyatt (curAttributeSet, use_value); if (!attp) { @@ -555,20 +695,18 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, term_dict[prefix_len++] = 1; term_dict[prefix_len++] = regType; term_dict[prefix_len] = '\0'; - if (!relational_term (zi, zapt, term_sub, term_dict, + if (!relational_term (zi, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos)) { - const char *cp; - j = prefix_len; switch (truncation_value) { case -1: /* not specified */ case 100: /* do not truncate */ term_dict[j++] = '('; - for (i = 0; term_sub[i]; i++) - verbatim_char (term_sub[i], &j, term_dict); - strcpy (term_dict+j, ")"); + if (!term_100 (&termp, term_dict + j)) + return 0; + strcat (term_dict, ")"); r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) @@ -576,9 +714,9 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, break; case 1: /* right truncation */ term_dict[j++] = '('; - for (i = 0; term_sub[i]; i++) - verbatim_char (term_sub[i], &j, term_dict); - strcpy (term_dict+j, ".*)"); + if (!term_100 (&termp, term_dict + j)) + return 0; + strcat (term_dict, ".*)"); dict_lookup_grep (zi->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; @@ -588,40 +726,35 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return -1; case 101: /* process # in term */ term_dict[j++] = '('; - for (i=0; term_sub[i]; i++) - if (term_sub[i] == '#' && i > 2) - { - term_dict[j++] = '.'; - term_dict[j++] = '*'; - } - else - verbatim_char (term_sub[i], &j, term_dict); - strcpy (term_dict+j, ")"); + if (!term_101 (&termp, term_dict + j)) + return 0; + strcat (term_dict, ")"); r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) - logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", - r); + logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r); break; - case 102: /* regular expression */ - sprintf (term_dict + j, "(%s)", term_sub); + case 102: /* Regexp-1 */ + term_dict[j++] = '('; + if (!term_102 (&termp, term_dict + j)) + return 0; + strcat (term_dict, ")"); + logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r); r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d", r); break; - case 103: /* regular expression with error correction */ - cp = term_sub; - r = 0; - if (*cp == '*' && cp[1] && cp[2]) - { - r = atoi (cp+1); - cp += 2; - } - sprintf (term_dict + j, "(%s)", cp); + case 103: /* Regexp-1 */ + r = 1; + term_dict[j++] = '('; + if (!term_103 (&termp, term_dict + j, &r)) + return 0; + strcat (term_dict, ")"); + logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r); r = dict_lookup_grep (zi->dict, term_dict, r, grep_info, - &max_pos, j, grep_handle); + &max_pos, 2, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d", r); @@ -629,8 +762,9 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, } } } + *term_sub = termp; logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx); - return 0; + return 1; } static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, @@ -682,16 +816,17 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, { rset_relevance_parms parms; char termz[IT_MAX_WORD+1]; - char term_sub[IT_MAX_WORD+1]; + char *termp = termz; struct grep_info grep_info; - char *p0 = termz; RSET result; int term_index = 0; + int r; parms.key_size = sizeof(struct it_key); parms.max_rec = 100; parms.cmp = key_compare; parms.is = zi->isam; + parms.isc = zi->isamc; parms.no_terms = 0; if (zapt->term->which != Z_Term_general) @@ -709,34 +844,10 @@ static RSET rpn_search_APT_relevance (ZServerInfo *zi, grep_info.isam_p_buf = NULL; while (1) { - char **map; - char *p2, *p1; - - p1 = p0; - while (*(p0 = p1)) - { - map = map_chrs_input (&p1, strlen(p1)); - if (**map != *CHR_SPACE) - break; - } - if (!*p0) + r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info, + num_bases, basenames); + if (r <= 0) break; - - p1 = p0; - while (*(p2 = p1)) - { - map = map_chrs_input (&p1, strlen(p1)); - if (**map == *CHR_SPACE) - break; - } - if (p2 == p0) - break; - memcpy (term_sub, p0, p2-p0); - term_sub[p2-p0] = '\0'; - p0 = p2; - if (field_term (zi, zapt, term_sub, 'w', attributeSet, &grep_info, - num_bases, basenames)) - return NULL; #ifdef TERM_COUNT for (; term_index < grep_info.isam_p_indx; term_index++) grep_info.term_no[term_index] = parms.no_terms; @@ -765,6 +876,8 @@ static RSET rpn_search_APT_cphrase (ZServerInfo *zi, char termz[IT_MAX_WORD+1]; struct grep_info grep_info; RSET result; + char *termp = termz; + int r; if (zapt->term->which != Z_Term_general) { @@ -780,9 +893,8 @@ static RSET rpn_search_APT_cphrase (ZServerInfo *zi, grep_info.isam_p_size = 0; grep_info.isam_p_buf = NULL; - if (field_term (zi, zapt, termz, 'p', attributeSet, &grep_info, - num_bases, basenames)) - return NULL; + r = field_term (zi, zapt, &termp, 'p', attributeSet, &grep_info, + num_bases, basenames); result = rset_trunc (zi, grep_info.isam_p_buf, grep_info.isam_p_indx); #ifdef TERM_COUNT xfree(grep_info.term_no); @@ -883,10 +995,9 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, int num_bases, char **basenames) { char termz[IT_MAX_WORD+1]; - char term_sub[IT_MAX_WORD+1]; - char *p0 = termz; + char *termp = termz; RSET rset[60], result; - int i, rset_no = 0; + int i, r, rset_no = 0; struct grep_info grep_info; if (zapt->term->which != Z_Term_general) @@ -904,37 +1015,11 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, while (1) { - char **map; - char *p2, *p1; - - p1 = p0; - while (*(p0 = p1)) - { - map = map_chrs_input (&p1, strlen(p1)); - if (**map != *CHR_SPACE) - break; - } - if (!*p0) - break; - - p1 = p0; - while (*(p2 = p1)) - { - map = map_chrs_input (&p1, strlen(p1)); - if (**map == *CHR_SPACE) - break; - } - if (p2 == p0) - break; - - memcpy (term_sub, p0, p2-p0); - term_sub[p2-p0] = '\0'; - p0 = p2; - grep_info.isam_p_indx = 0; - if (field_term (zi, zapt, term_sub, 'w', attributeSet, &grep_info, - num_bases, basenames)) - return NULL; + r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info, + num_bases, basenames); + if (r < 1) + break; rset[rset_no] = rset_trunc (zi, grep_info.isam_p_buf, grep_info.isam_p_indx); assert (rset[rset_no]); @@ -949,7 +1034,6 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, return rset_create (rset_kind_null, NULL); else if (rset_no == 1) return (rset[0]); - result = rpn_prox (rset, rset_no); for (i = 0; iinfo[recInfo_fileType]; fname = rec->info[recInfo_filename]; + basename = rec->info[recInfo_databaseName]; + *basenamep = odr_malloc (stream, strlen(basename)+1); + strcpy (*basenamep, basename); if (!(rt = recType_byName (file_type, subType))) { @@ -450,7 +459,8 @@ bend_fetchresult *bend_fetch (void *handle, bend_fetchrequest *q, int *num) } r.errcode = record_fetch (&server_info, records[0].sysno, records[0].score, q->stream, q->format, - q->comp, &r.format, &r.record, &r.len); + q->comp, &r.format, &r.record, &r.len, + &r.basename); resultSetSysnoDel (&server_info, records, 1); register_unlock (&server_info); return &r; -- 1.7.10.4