X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=88dbefcdcbc924e2ebe3ee245ff900128d990759;hb=47ea1fc957c7b97bb30a26698f072109cae275e4;hp=78f61803ff74713094ad87292a241e024f7003f3;hpb=15ded665c8ca37e3c7a99ce48c2e78acdb727bd6;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 78f6180..88dbefc 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: zrpn.c,v 1.115 2002-04-13 18:16:43 adam Exp $ + * $Id: zrpn.c,v 1.117 2002-07-25 13:06:43 adam Exp $ */ #include #include @@ -250,6 +250,8 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src, return *s0; } +#define REGEX_CHARS "[]()|.*+!" + /* term_100: handle term, where trunc=none (no operators at all) */ static int term_100 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, @@ -287,7 +289,7 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type, { /* reload last space */ while (space_start < space_end) { - if (!isalnum (*space_start) && *space_start != '-') + if (strchr (REGEX_CHARS, *space_start)) dst[i++] = '\\'; dst_term[j++] = *space_start; dst[i++] = *space_start++; @@ -299,7 +301,7 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type, /* add non-space char */ while (s1 < s0) { - if (!isalnum (*s1) && *s1 != '-') + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -340,7 +342,7 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -389,7 +391,7 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -444,7 +446,7 @@ static int term_104 (ZebraMaps zebra_maps, int reg_type, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -490,7 +492,7 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -824,7 +826,8 @@ static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt, logf (LOG_DEBUG, "term: %s", term_dst); return rset_trunc (zh, grep_info->isam_p_buf, grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */); + strlen(term_dst), rank_type, 1 /* preserve pos */, + zapt->term->which); } @@ -891,6 +894,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, curAttributeSet, use_value, r); if (r == -1) { + /* set was found, but value wasn't defined */ char val_str[32]; sprintf (val_str, "%d", use_value); zh->errCode = 114; @@ -1070,17 +1074,55 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, return 1; } -static void trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz) + +static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, + char *termz) { size_t sizez; Z_Term *term = zapt->term; - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD-1) - sizez = IT_MAX_WORD-1; - memcpy (termz, term->u.general->buf, sizez); - termz[sizez] = '\0'; + switch (term->which) + { + case Z_Term_general: +#if HAVE_ICONV_H + if (zh->iconv_to_utf8 != (iconv_t)(-1)) + { + char *inbuf = term->u.general->buf; + size_t inleft = term->u.general->len; + char *outbuf = termz; + size_t outleft = IT_MAX_WORD-1; + size_t ret; + + yaz_log (LOG_DEBUG, "converting general from ISO-8859-1"); + ret = iconv(zh->iconv_to_utf8, &inbuf, &inleft, + &outbuf, &outleft); + if (ret == (size_t)(-1)) + { + ret = iconv(zh->iconv_to_utf8, 0, 0, 0, 0); + zh->errCode = 125; + return -1; + } + *outbuf = 0; + return 0; + } +#endif + sizez = term->u.general->len; + if (sizez > IT_MAX_WORD-1) + sizez = IT_MAX_WORD-1; + memcpy (termz, term->u.general->buf, sizez); + termz[sizez] = '\0'; + break; + case Z_Term_characterString: + sizez = strlen(term->u.characterString); + if (sizez > IT_MAX_WORD-1) + sizez = IT_MAX_WORD-1; + memcpy (termz, term->u.characterString, sizez); + termz[sizez] = '\0'; + break; + default: + zh->errCode = 124; + } + return 0; } static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, @@ -1125,6 +1167,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, int length_prox_term = 0; int min_nn = 10000000; int term_index; + int term_type = Z_Term_characterString; const char *flags = NULL; rsfd = (RSFD *) xmalloc (sizeof(*rsfd)*rset_no); @@ -1150,6 +1193,12 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, if (min_nn > rset[i]->rset_terms[j]->nn) min_nn = rset[i]->rset_terms[j]->nn; flags = nflags; + term_type = rset[i]->rset_terms[j]->type; + + /* only if all term types are of type characterString .. */ + /* the resulting term is of that type */ + if (term_type != Z_Term_characterString) + term_type = Z_Term_general; } } for (i = 0; inn = 0; result = rset_create (rset_kind_null, &parms); } @@ -1181,7 +1230,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, RSFD rsfd_result; parms.rset_term = rset_term_create (prox_term, length_prox_term, - flags); + flags, term_type); parms.rset_term->nn = min_nn; parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); @@ -1240,7 +1289,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, logf (LOG_LOG, "generic prox, dist = %d, relation = %d, ordered =%d, exclusion=%d", distance, relation, ordered, exclusion); parms.rset_term = rset_term_create (prox_term, length_prox_term, - flags); + flags, term_type); parms.rset_term->nn = min_nn; parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); @@ -1320,7 +1369,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, rset_null_parms parms; parms.rset_term = rset_term_create (prox_term, length_prox_term, - flags); + flags, term_type); parms.rset_term->nn = 0; result = rset_create (rset_kind_null, &parms); } @@ -1473,7 +1522,8 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } else if (rset_no == 1) @@ -1520,7 +1570,8 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } result = rset[0]; @@ -1574,7 +1625,8 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } result = rset[0]; @@ -1766,7 +1818,8 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh, rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term_dst, strlen(term_dst), rank_type, - 0 /* preserve position */); + 0 /* preserve position */, + zapt->term->which); assert (rset[rset_no]); if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset))) break; @@ -1776,7 +1829,8 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (term_dst, -1, rank_type); + parms.rset_term = rset_term_create (term_dst, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } result = rset[0]; @@ -1804,7 +1858,8 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct it_key key; rset_temp_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->res, "setTmpDir"); @@ -1917,7 +1972,8 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt, sort_sequence->specs[i] = sks; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } @@ -2000,7 +2056,7 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_start_tag = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, use_string, strlen(use_string), - rank_type, 1); + rank_type, 1, zapt->term->which); prefix_len = 0; ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, 2); @@ -2034,7 +2090,7 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_end_tag = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, use_string, strlen(use_string), - rank_type, 1); + rank_type, 1, zapt->term->which); parms.key_size = sizeof(struct it_key); parms.cmp = key_compare_it; @@ -2073,12 +2129,8 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, logf (LOG_DEBUG, "search_type=%s", search_type); logf (LOG_DEBUG, "rank_type=%s", rank_type); - if (zapt->term->which != Z_Term_general) - { - zh->errCode = 124; - return NULL; - } - trans_term (zh, zapt, termz); + if (trans_term (zh, zapt, termz)) + return 0; if (sort_flag) return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence, @@ -2523,7 +2575,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, &glist[i+before].term, mterm); rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[i+before].term, strlen(glist[i+before].term), - NULL, 0); + NULL, 0, zapt->term->which); ptr[j0]++; for (j = j0+1; jterm->which); bool_parms.key_size = sizeof(struct it_key); bool_parms.cmp = key_compare_it; @@ -2589,7 +2642,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset = rset_trunc (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, glist[before-1-i].term, strlen(glist[before-1-i].term), - NULL, 0); + NULL, 0, zapt->term->which); ptr[j0]++; @@ -2605,7 +2658,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset2 = rset_trunc (zh, &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1, glist[before-1-i].term, - strlen(glist[before-1-i].term), NULL, 0); + strlen(glist[before-1-i].term), NULL, 0, + zapt->term->which); bool_parms.key_size = sizeof(struct it_key); bool_parms.cmp = key_compare_it;