X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=e87cf464d07f2051bb55b24711e85e6969cc9e1e;hb=28f3461a79a4569d736f4ffc66c6ae207e38c2ab;hp=766df2a261f8b74ff92827f09f8b7ad5f3c3fdf2;hpb=543ab71dfcf03778b00faa164f94552cf11cab79;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 766df2a..e87cf46 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,19 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.94 1999-07-20 13:59:18 adam + * Revision 1.98 1999-11-30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.97 1999/10/14 14:33:50 adam + * Added truncation 5=106. + * + * Revision 1.96 1999/09/23 10:05:05 adam + * Implemented structure=105 searching. + * + * Revision 1.95 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.94 1999/07/20 13:59:18 adam * Fixed bug that occurred when phrases had 0 hits. * * Revision 1.93 1999/06/17 14:38:40 adam @@ -439,7 +451,7 @@ struct grep_info { #ifdef TERM_COUNT int *term_no; #endif - ISAM_P *isam_p_buf; + ISAMS_P *isam_p_buf; int isam_p_size; int isam_p_indx; ZebraHandle zh; @@ -452,8 +464,11 @@ static void term_untrans (ZebraHandle zh, int reg_type, while (*src) { const char *cp = zebra_maps_output (zh->zebra_maps, reg_type, &src); - while (*cp) - *dst++ = *cp++; + if (!cp) + *dst++ = *src++; + else + while (*cp) + *dst++ = *cp++; } *dst = '\0'; } @@ -463,12 +478,12 @@ static void add_isam_p (const char *name, const char *info, { if (p->isam_p_indx == p->isam_p_size) { - ISAM_P *new_isam_p_buf; + ISAMS_P *new_isam_p_buf; #ifdef TERM_COUNT int *new_term_no; #endif p->isam_p_size = 2*p->isam_p_size + 100; - new_isam_p_buf = (ISAM_P *) xmalloc (sizeof(*new_isam_p_buf) * + new_isam_p_buf = (ISAMS_P *) xmalloc (sizeof(*new_isam_p_buf) * p->isam_p_size); if (p->isam_p_buf) { @@ -661,6 +676,106 @@ static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, dst_term); } + +/* term_104: handle term, where trunc=Process # and ! */ +static int term_104 (ZebraMaps zebra_maps, int reg_type, + const char **src, char *dst, int space_split, + char *dst_term) +{ + const char *s0, *s1; + const char **map; + int i = 0; + int j = 0; + + if (!term_pre (zebra_maps, reg_type, src, "#!", "#!")) + return 0; + s0 = *src; + while (*s0) + { + if (*s0 == '#') + { + dst[i++] = '.'; + dst[i++] = '*'; + dst_term[j++] = *s0++; + } + else if (*s0 == '!') + { + dst[i++] = '.'; + dst_term[j++] = *s0++; + } + { + s1 = s0; + map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); + if (space_split && **map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst_term[j++] = *s1; + dst[i++] = *s1++; + } + } + } + dst[i] = '\0'; + dst_term[j++] = '\0'; + *src = s0; + return i; +} + +/* term_105/106: handle term, where trunc=Process # and ! and right trunc */ +static int term_105 (ZebraMaps zebra_maps, int reg_type, + const char **src, char *dst, int space_split, + char *dst_term, int right_truncate) +{ + const char *s0, *s1; + const char **map; + int i = 0; + int j = 0; + + if (!term_pre (zebra_maps, reg_type, src, "*!", "*!")) + return 0; + s0 = *src; + while (*s0) + { + if (*s0 == '*') + { + dst[i++] = '.'; + dst[i++] = '+'; + dst_term[j++] = *s0++; + } + else if (*s0 == '!') + { + dst[i++] = '.'; + dst_term[j++] = *s0++; + } + { + s1 = s0; + map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0)); + if (space_split && **map == *CHR_SPACE) + break; + while (s1 < s0) + { + if (!isalnum (*s1)) + dst[i++] = '\\'; + dst_term[j++] = *s1; + dst[i++] = *s1++; + } + } + } + if (right_truncate) + { + dst[i++] = '.'; + dst[i++] = '*'; + } + dst[i] = '\0'; + + dst_term[j++] = '\0'; + *src = s0; + return i; +} + + /* gen_regular_rel - generate regular expression from relation * val: border value (inclusive) * islt: 1 if <=; 0 if >=. @@ -758,11 +873,14 @@ static void gen_regular_rel (char *dst, int val, int islt) dst[dst_p] = '\0'; if (islt) { - for (i=1; izebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) + return 0; + strcat (term_dict, ")"); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r); + break; + case 105: /* process * and ! in term */ + term_dict[j++] = '('; + if (!term_105 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst, 1)) + return 0; + strcat (term_dict, ")"); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); + break; + case 106: /* process * and ! in term */ + term_dict[j++] = '('; + if (!term_105 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst, 0)) + return 0; + strcat (term_dict, ")"); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + if (r) + logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r); + break; } } *term_sub = termp; @@ -1399,9 +1550,54 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) return result; } + +char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + const char *termz, NMEM stream, unsigned reg_id) +{ + WRBUF wrbuf = 0; + AttrType truncation; + int truncation_value; + char *ex_list = 0; + + attr_init (&truncation, zapt, 5); + truncation_value = attr_find (&truncation, NULL); + + switch (truncation_value) + { + default: + ex_list = ""; + break; + case 101: + ex_list = "#"; + break; + case 102: + case 103: + ex_list = 0; + break; + case 104: + ex_list = "!#"; + break; + case 105: + ex_list = "!*"; + break; + } + if (ex_list) + wrbuf = zebra_replace(zh->zebra_maps, reg_id, ex_list, + termz, strlen(termz)); + if (!wrbuf) + return nmem_strdup(stream, termz); + else + { + char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1); + memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); + buf[wrbuf_len(wrbuf)] = '\0'; + return buf; + } +} + static RSET rpn_search_APT_phrase (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz, + const char *termz_org, oid_value attributeSet, NMEM stream, int reg_type, int complete_flag, @@ -1409,10 +1605,11 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, int num_bases, char **basenames) { char term_dst[IT_MAX_WORD+1]; - const char *termp = termz; RSET rset[60], result; int i, r, rset_no = 0; struct grep_info grep_info; + char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); + const char *termp = termz; #ifdef TERM_COUNT grep_info.term_no = 0; @@ -1460,7 +1657,7 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, static RSET rpn_search_APT_or_list (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz, + const char *termz_org, oid_value attributeSet, NMEM stream, int reg_type, int complete_flag, @@ -1468,11 +1665,11 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh, int num_bases, char **basenames) { char term_dst[IT_MAX_WORD+1]; - const char *termp = termz; RSET rset[60], result; int i, r, rset_no = 0; struct grep_info grep_info; - + char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); + const char *termp = termz; #ifdef TERM_COUNT grep_info.term_no = 0; #endif @@ -1525,7 +1722,7 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh, static RSET rpn_search_APT_and_list (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz, + const char *termz_org, oid_value attributeSet, NMEM stream, int reg_type, int complete_flag, @@ -1533,10 +1730,11 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh, int num_bases, char **basenames) { char term_dst[IT_MAX_WORD+1]; - const char *termp = termz; RSET rset[60], result; int i, r, rset_no = 0; struct grep_info grep_info; + char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); + const char *termp = termz; #ifdef TERM_COUNT grep_info.term_no = 0; @@ -2159,7 +2357,7 @@ RSET rpn_search (ZebraHandle zh, NMEM nmem, struct scan_info_entry { char *term; - ISAM_P isam_p; + ISAMS_P isam_p; }; struct scan_info { @@ -2183,8 +2381,8 @@ static int scan_handle (char *name, const char *info, int pos, void *client) scan_info->list[idx].term = (char *) odr_malloc (scan_info->odr, strlen(name + len_prefix)+1); strcpy (scan_info->list[idx].term, name + len_prefix); - assert (*info == sizeof(ISAM_P)); - memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P)); + assert (*info == sizeof(ISAMS_P)); + memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P)); return 0; }