X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=03e0f5eb3fb0f7a8781e105fad067e1aa3da86c8;hb=83533000f9456dcab2fc171abafd84d6104d4087;hp=ca8e014dc0547624f126e7e002881ac1e1a1be84;hpb=ef696645cc3b7e0f4027008d1dc589c0f0f90c1f;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index ca8e014..03e0f5e 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,34 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.92 1999-05-26 07:49:13 adam + * Revision 1.101 2000-03-02 14:35:03 adam + * Fixed proximity handling. + * + * Revision 1.100 1999/12/28 15:48:12 adam + * Minor Fix. + * + * Revision 1.99 1999/12/23 09:03:32 adam + * Changed behaviour of trunc=105 so that * is regular .* and ! is regular . + * + * Revision 1.98 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.97 1999/10/14 14:33:50 adam + * Added truncation 5=106. + * + * Revision 1.96 1999/09/23 10:05:05 adam + * Implemented structure=105 searching. + * + * Revision 1.95 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.94 1999/07/20 13:59:18 adam + * Fixed bug that occurred when phrases had 0 hits. + * + * Revision 1.93 1999/06/17 14:38:40 adam + * Bug fix: Scan SEGV'ed when getting unknown use attribute. + * + * Revision 1.92 1999/05/26 07:49:13 adam * C++ compilation. * * Revision 1.91 1999/02/02 14:51:13 adam @@ -433,7 +460,7 @@ struct grep_info { #ifdef TERM_COUNT int *term_no; #endif - ISAM_P *isam_p_buf; + ISAMS_P *isam_p_buf; int isam_p_size; int isam_p_indx; ZebraHandle zh; @@ -446,8 +473,11 @@ static void term_untrans (ZebraHandle zh, int reg_type, while (*src) { const char *cp = zebra_maps_output (zh->zebra_maps, reg_type, &src); - while (*cp) - *dst++ = *cp++; + if (!cp) + *dst++ = *src++; + else + while (*cp) + *dst++ = *cp++; } *dst = '\0'; } @@ -457,12 +487,12 @@ static void add_isam_p (const char *name, const char *info, { if (p->isam_p_indx == p->isam_p_size) { - ISAM_P *new_isam_p_buf; + ISAMS_P *new_isam_p_buf; #ifdef TERM_COUNT int *new_term_no; #endif p->isam_p_size = 2*p->isam_p_size + 100; - new_isam_p_buf = (ISAM_P *) xmalloc (sizeof(*new_isam_p_buf) * + new_isam_p_buf = (ISAMS_P *) xmalloc (sizeof(*new_isam_p_buf) * p->isam_p_size); if (p->isam_p_buf) { @@ -523,6 +553,7 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src, return *s0; } +/* term_100: handle term, where trunc=none (no operators at all) */ static int term_100 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term) @@ -555,6 +586,7 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type, return i; } +/* term_101: handle term, where trunc=Process # */ static int term_101 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term) @@ -596,7 +628,7 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type, return i; } - +/* term_103: handle term, where trunc=re-2 (regular expressions) */ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int *errors, int space_split, char *dst_term) @@ -645,6 +677,7 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, return i; } +/* term_103: handle term, where trunc=re-1 (regular expressions) */ static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, char *dst_term) { @@ -652,6 +685,106 @@ static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, dst_term); } + +/* term_104: handle term, where trunc=Process # and ! */ +static int term_104 (ZebraMaps zebra_maps, int reg_type, + const char **src, char *dst, int space_split, + char *dst_term) +{ + const char *s0, *s1; + const char **map; + int i = 0; + int j = 0; + + if (!term_pre (zebra_maps, reg_type, src, "#!", "#!")) + return 0; + s0 = *src; + while (*s0) + { + if (*s0 == '#') + { + dst[i++] = '.'; + dst[i++] = '*'; + dst_term[j++] = *s0++; + } + else if (*s0 == '!') + { + dst[i++] = '.'; + dst_term[j++] = *s0++; + } + { + s1 = s0; + map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); + if (space_split && **map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst_term[j++] = *s1; + dst[i++] = *s1++; + } + } + } + dst[i] = '\0'; + dst_term[j++] = '\0'; + *src = s0; + return i; +} + +/* term_105/106: handle term, where trunc=Process * and ! and right trunc */ +static int term_105 (ZebraMaps zebra_maps, int reg_type, + const char **src, char *dst, int space_split, + char *dst_term, int right_truncate) +{ + const char *s0, *s1; + const char **map; + int i = 0; + int j = 0; + + if (!term_pre (zebra_maps, reg_type, src, "*!", "*!")) + return 0; + s0 = *src; + while (*s0) + { + if (*s0 == '*') + { + dst[i++] = '.'; + dst[i++] = '*'; + dst_term[j++] = *s0++; + } + else if (*s0 == '!') + { + dst[i++] = '.'; + dst_term[j++] = *s0++; + } + { + s1 = s0; + map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); + if (space_split && **map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst_term[j++] = *s1; + dst[i++] = *s1++; + } + } + } + if (right_truncate) + { + dst[i++] = '.'; + dst[i++] = '*'; + } + dst[i] = '\0'; + + dst_term[j++] = '\0'; + *src = s0; + return i; +} + + /* gen_regular_rel - generate regular expression from relation * val: border value (inclusive) * islt: 1 if <=; 0 if >=. @@ -749,11 +882,14 @@ static void gen_regular_rel (char *dst, int val, int islt) dst[dst_p] = '\0'; if (islt) { - for (i=1; izebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) + return 0; + strcat (term_dict, ")"); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r); + break; + case 105: /* process * and ! in term */ + term_dict[j++] = '('; + if (!term_105 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst, 1)) + return 0; + strcat (term_dict, ")"); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); + break; + case 106: /* process * and ! in term */ + term_dict[j++] = '('; + if (!term_105 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst, 0)) + return 0; + strcat (term_dict, ")"); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); + break; } } *term_sub = termp; @@ -1171,99 +1340,8 @@ static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, termz[i] = '\0'; } -static RSET rpn_proximity (ZebraHandle zh, RSET rset1, RSET rset2, - int ordered, - int exclusion, int relation, int distance) -{ - int i; - RSFD rsfd1, rsfd2; - int more1, more2; - struct it_key buf1, buf2; - RSFD rsfd_result; - RSET result; - rset_temp_parms parms; - int term_index; - - rsfd1 = rset_open (rset1, RSETF_READ); - more1 = rset_read (rset1, rsfd1, &buf1, &term_index); - - rsfd2 = rset_open (rset2, RSETF_READ); - more2 = rset_read (rset2, rsfd2, &buf2, &term_index); - - parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zh->res, "setTmpDir"); - result = rset_create (rset_kind_temp, &parms); - rsfd_result = rset_open (result, RSETF_WRITE); - - logf (LOG_DEBUG, "rpn_proximity excl=%d ord=%d rel=%d dis=%d", - exclusion, ordered, relation, distance); - while (more1 && more2) - { - int cmp = key_compare_it (&buf1, &buf2); - if (cmp < -1) - more1 = rset_read (rset1, rsfd1, &buf1, &term_index); - else if (cmp > 1) - more2 = rset_read (rset2, rsfd2, &buf2, &term_index); - else - { - int sysno = buf1.sysno; - int seqno[500]; - int n = 0; - - seqno[n++] = buf1.seqno; - while ((more1 = rset_read (rset1, rsfd1, &buf1, &term_index)) && - sysno == buf1.sysno) - if (n < 500) - seqno[n++] = buf1.seqno; - do - { - for (i = 0; i= */ - if (diff >= distance) - excl = !excl; - break; - case 5: /* > */ - if (diff > distance) - excl = !excl; - break; - case 6: /* != */ - if (diff != distance) - excl = !excl; - break; - } - if (excl) - rset_write (result, rsfd_result, &buf2); - } - } while ((more2 = rset_read (rset2, rsfd2, &buf2, &term_index)) && - sysno == buf2.sysno); - } - } - rset_close (result, rsfd_result); - rset_close (rset1, rsfd1); - rset_close (rset2, rsfd2); - return result; -} - -static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) +static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, + int ordered, int exclusion, int relation, int distance) { int i; RSFD *rsfd; @@ -1280,47 +1358,57 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) more = (int *) xmalloc (sizeof(*more)*rset_no); buf = (struct it_key **) xmalloc (sizeof(*buf)*rset_no); + *prox_term = '\0'; for (i = 0; ino_rset_terms; j++) { const char *nflags = rset[i]->rset_terms[j]->flags; char *term = rset[i]->rset_terms[j]->name; int lterm = strlen(term); - if (length_prox_term) - prox_term[length_prox_term++] = ' '; - strcpy (prox_term + length_prox_term, term); - length_prox_term += lterm; + if (lterm + length_prox_term < sizeof(prox_term)-1) + { + if (length_prox_term) + prox_term[length_prox_term++] = ' '; + strcpy (prox_term + length_prox_term, term); + length_prox_term += lterm; + } if (min_nn > rset[i]->rset_terms[j]->nn) min_nn = rset[i]->rset_terms[j]->nn; flags = nflags; } } + for (i = 0; i= 0) - { - rset_close (rset[i], rsfd[i]); - xfree (buf[i]); - --i; - } - parms.rset_term = rset_term_create (prox_term, -1, flags); + + parms.rset_term = rset_term_create (prox_term, length_prox_term, + flags); parms.rset_term->nn = 0; result = rset_create (rset_kind_null, &parms); } - else + else if (ordered && relation == 3 && exclusion == 0 && distance == 1) { + /* special proximity case = phrase search ... */ rset_temp_parms parms; RSFD rsfd_result; - parms.rset_term = rset_term_create (prox_term, -1, flags); + parms.rset_term = rset_term_create (prox_term, length_prox_term, + flags); parms.rset_term->nn = min_nn; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->res, "setTmpDir"); @@ -1367,23 +1455,160 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) more[0] = rset_read (*rset, *rsfd, *buf, &term_index); } } - - for (i = 0; inn = min_nn; + parms.key_size = sizeof (struct it_key); + parms.temp_path = res_get (zh->res, "setTmpDir"); + result = rset_create (rset_kind_temp, &parms); + rsfd_result = rset_open (result, RSETF_WRITE); + + while (more[0] && more[1]) { - rset_close (rset[i], rsfd[i]); - xfree (buf[i]); + int cmp = key_compare_it (buf[0], buf[1]); + if (cmp < -1) + more[0] = rset_read (rset[0], rsfd[0], buf[0], &term_index); + else if (cmp > 1) + more[1] = rset_read (rset[1], rsfd[1], buf[1], &term_index); + else + { + int sysno = buf[0]->sysno; + int seqno[500]; + int n = 0; + + seqno[n++] = buf[0]->seqno; + while ((more[0] = rset_read (rset[0], rsfd[0], buf[0], + &term_index)) && + sysno == buf[0]->sysno) + if (n < 500) + seqno[n++] = buf[0]->seqno; + do + { + for (i = 0; iseqno - seqno[i]; + int excl = exclusion; + if (!ordered && diff < 0) + diff = -diff; + switch (relation) + { + case 1: /* < */ + if (diff < distance && diff >= 0) + excl = !excl; + break; + case 2: /* <= */ + if (diff <= distance && diff >= 0) + excl = !excl; + break; + case 3: /* == */ + if (diff == distance && diff >= 0) + excl = !excl; + break; + case 4: /* >= */ + if (diff >= distance && diff >= 0) + excl = !excl; + break; + case 5: /* > */ + if (diff > distance && diff >= 0) + excl = !excl; + break; + case 6: /* != */ + if (diff != distance && diff >= 0) + excl = !excl; + break; + } + if (excl) + { + rset_write (result, rsfd_result, buf[1]); + break; + } + } + } while ((more[1] = rset_read (rset[1], rsfd[1], buf[1], + &term_index)) && + sysno == buf[1]->sysno); + } } rset_close (result, rsfd_result); } + else + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (prox_term, length_prox_term, + flags); + parms.rset_term->nn = 0; + result = rset_create (rset_kind_null, &parms); + } + for (i = 0; izebra_maps, reg_id, ex_list, + termz, strlen(termz)); + if (!wrbuf) + return nmem_strdup(stream, termz); + else + { + char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1); + memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); + buf[wrbuf_len(wrbuf)] = '\0'; + return buf; + } +} + static RSET rpn_search_APT_phrase (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz, + const char *termz_org, oid_value attributeSet, NMEM stream, int reg_type, int complete_flag, @@ -1391,10 +1616,11 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, int num_bases, char **basenames) { char term_dst[IT_MAX_WORD+1]; - const char *termp = termz; RSET rset[60], result; int i, r, rset_no = 0; struct grep_info grep_info; + char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); + const char *termp = termz; #ifdef TERM_COUNT grep_info.term_no = 0; @@ -1434,7 +1660,7 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, } else if (rset_no == 1) return (rset[0]); - result = rpn_prox (zh, rset, rset_no); + result = rpn_prox (zh, rset, rset_no, 1, 0, 3, 1); for (i = 0; iu.prox->ordered, - (!zop->u.prox->exclusion ? 0 : - *zop->u.prox->exclusion), - *zop->u.prox->relationType, - *zop->u.prox->distance); + else + { + RSET rsets[2]; + + rsets[0] = bool_parms.rset_l; + rsets[1] = bool_parms.rset_r; + + r = rpn_prox (zh, rsets, 2, + *zop->u.prox->ordered, + (!zop->u.prox->exclusion ? 0 : + *zop->u.prox->exclusion), + *zop->u.prox->relationType, + *zop->u.prox->distance); + rset_delete (rsets[0]); + rset_delete (rsets[1]); + } break; default: zh->errCode = 110; @@ -2141,7 +2378,7 @@ RSET rpn_search (ZebraHandle zh, NMEM nmem, struct scan_info_entry { char *term; - ISAM_P isam_p; + ISAMS_P isam_p; }; struct scan_info { @@ -2159,15 +2396,14 @@ static int scan_handle (char *name, const char *info, int pos, void *client) len_prefix = strlen(scan_info->prefix); if (memcmp (name, scan_info->prefix, len_prefix)) return 1; - if (pos > 0) - idx = scan_info->after - pos + scan_info->before; + if (pos > 0) idx = scan_info->after - pos + scan_info->before; else idx = - pos - 1; scan_info->list[idx].term = (char *) odr_malloc (scan_info->odr, strlen(name + len_prefix)+1); strcpy (scan_info->list[idx].term, name + len_prefix); - assert (*info == sizeof(ISAM_P)); - memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P)); + assert (*info == sizeof(ISAMS_P)); + memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P)); return 0; } @@ -2232,6 +2468,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, char *rank_type = NULL; int complete_flag; int sort_flag; + *list = 0; if (attributeset == VAL_NONE) attributeset = VAL_BIB1; @@ -2264,6 +2501,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, zh->errCode = 114; else zh->errCode = 121; + *num_entries = 0; + return; } if (zebraExplain_curDatabase (zh->zei, basenames[base_no])) {