X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=88dbefcdcbc924e2ebe3ee245ff900128d990759;hb=47ea1fc957c7b97bb30a26698f072109cae275e4;hp=06f42599cbe57bf97b6199f75a93e0d303e76ce3;hpb=85728191a4dc93d31e3d1bd0ddc2ae60381ae6ba;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 06f4259..88dbefc 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: zrpn.c,v 1.113 2002-04-12 14:40:42 adam Exp $ + * $Id: zrpn.c,v 1.117 2002-07-25 13:06:43 adam Exp $ */ #include #include @@ -85,17 +85,17 @@ static int attr_find_ex (AttrType *src, oid_value *attributeSetP, case Z_AttributeValue_complex: if (src->minor >= element->value.complex->num_list) break; + if (element->attributeSet && attributeSetP) + { + oident *attrset; + + attrset = oid_getentbyoid (element->attributeSet); + *attributeSetP = attrset->value; + } if (element->value.complex->list[src->minor]->which == Z_StringOrNumeric_numeric) { ++(src->minor); - if (element->attributeSet && attributeSetP) - { - oident *attrset; - - attrset = oid_getentbyoid (element->attributeSet); - *attributeSetP = attrset->value; - } return *element->value.complex->list[src->minor-1]->u.numeric; } @@ -250,6 +250,8 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src, return *s0; } +#define REGEX_CHARS "[]()|.*+!" + /* term_100: handle term, where trunc=none (no operators at all) */ static int term_100 (ZebraMaps zebra_maps, int reg_type, const char **src, char *dst, int space_split, @@ -287,7 +289,7 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type, { /* reload last space */ while (space_start < space_end) { - if (!isalnum (*space_start) && *space_start != '-') + if (strchr (REGEX_CHARS, *space_start)) dst[i++] = '\\'; dst_term[j++] = *space_start; dst[i++] = *space_start++; @@ -299,7 +301,7 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type, /* add non-space char */ while (s1 < s0) { - if (!isalnum (*s1) && *s1 != '-') + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -340,7 +342,7 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -389,7 +391,7 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -444,7 +446,7 @@ static int term_104 (ZebraMaps zebra_maps, int reg_type, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -490,7 +492,7 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type, break; while (s1 < s0) { - if (!isalnum (*s1)) + if (strchr(REGEX_CHARS, *s1)) dst[i++] = '\\'; dst_term[j++] = *s1; dst[i++] = *s1++; @@ -824,7 +826,8 @@ static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt, logf (LOG_DEBUG, "term: %s", term_dst); return rset_trunc (zh, grep_info->isam_p_buf, grep_info->isam_p_indx, term_dst, - strlen(term_dst), rank_type, 1 /* preserve pos */); + strlen(term_dst), rank_type, 1 /* preserve pos */, + zapt->term->which); } @@ -856,9 +859,9 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, truncation_value = attr_find (&truncation, NULL); logf (LOG_DEBUG, "truncation value %d", truncation_value); - if (use_value == -1) + if (use_value == -1) /* no attribute - assumy "any" */ use_value = 1016; - if (use_value == -2) + if (use_value == -2) /* string attribute - assumy "any" */ use_value = 1016; for (base_no = 0; base_no < num_bases; base_no++) @@ -891,6 +894,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, curAttributeSet, use_value, r); if (r == -1) { + /* set was found, but value wasn't defined */ char val_str[32]; sprintf (val_str, "%d", use_value); zh->errCode = 114; @@ -1070,17 +1074,55 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, return 1; } -static void trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz) + +static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, + char *termz) { size_t sizez; Z_Term *term = zapt->term; - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD-1) - sizez = IT_MAX_WORD-1; - memcpy (termz, term->u.general->buf, sizez); - termz[sizez] = '\0'; + switch (term->which) + { + case Z_Term_general: +#if HAVE_ICONV_H + if (zh->iconv_to_utf8 != (iconv_t)(-1)) + { + char *inbuf = term->u.general->buf; + size_t inleft = term->u.general->len; + char *outbuf = termz; + size_t outleft = IT_MAX_WORD-1; + size_t ret; + + yaz_log (LOG_DEBUG, "converting general from ISO-8859-1"); + ret = iconv(zh->iconv_to_utf8, &inbuf, &inleft, + &outbuf, &outleft); + if (ret == (size_t)(-1)) + { + ret = iconv(zh->iconv_to_utf8, 0, 0, 0, 0); + zh->errCode = 125; + return -1; + } + *outbuf = 0; + return 0; + } +#endif + sizez = term->u.general->len; + if (sizez > IT_MAX_WORD-1) + sizez = IT_MAX_WORD-1; + memcpy (termz, term->u.general->buf, sizez); + termz[sizez] = '\0'; + break; + case Z_Term_characterString: + sizez = strlen(term->u.characterString); + if (sizez > IT_MAX_WORD-1) + sizez = IT_MAX_WORD-1; + memcpy (termz, term->u.characterString, sizez); + termz[sizez] = '\0'; + break; + default: + zh->errCode = 124; + } + return 0; } static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, @@ -1125,6 +1167,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, int length_prox_term = 0; int min_nn = 10000000; int term_index; + int term_type = Z_Term_characterString; const char *flags = NULL; rsfd = (RSFD *) xmalloc (sizeof(*rsfd)*rset_no); @@ -1150,6 +1193,12 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, if (min_nn > rset[i]->rset_terms[j]->nn) min_nn = rset[i]->rset_terms[j]->nn; flags = nflags; + term_type = rset[i]->rset_terms[j]->type; + + /* only if all term types are of type characterString .. */ + /* the resulting term is of that type */ + if (term_type != Z_Term_characterString) + term_type = Z_Term_general; } } for (i = 0; inn = 0; result = rset_create (rset_kind_null, &parms); } @@ -1181,7 +1230,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, RSFD rsfd_result; parms.rset_term = rset_term_create (prox_term, length_prox_term, - flags); + flags, term_type); parms.rset_term->nn = min_nn; parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); @@ -1240,7 +1289,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, logf (LOG_LOG, "generic prox, dist = %d, relation = %d, ordered =%d, exclusion=%d", distance, relation, ordered, exclusion); parms.rset_term = rset_term_create (prox_term, length_prox_term, - flags); + flags, term_type); parms.rset_term->nn = min_nn; parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); @@ -1320,7 +1369,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, rset_null_parms parms; parms.rset_term = rset_term_create (prox_term, length_prox_term, - flags); + flags, term_type); parms.rset_term->nn = 0; result = rset_create (rset_kind_null, &parms); } @@ -1473,7 +1522,8 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } else if (rset_no == 1) @@ -1520,7 +1570,8 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } result = rset[0]; @@ -1574,7 +1625,8 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } result = rset[0]; @@ -1766,7 +1818,8 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh, rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term_dst, strlen(term_dst), rank_type, - 0 /* preserve position */); + 0 /* preserve position */, + zapt->term->which); assert (rset[rset_no]); if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset))) break; @@ -1776,7 +1829,8 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh, { rset_null_parms parms; - parms.rset_term = rset_term_create (term_dst, -1, rank_type); + parms.rset_term = rset_term_create (term_dst, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } result = rset[0]; @@ -1804,7 +1858,8 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct it_key key; rset_temp_parms parms; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->res, "setTmpDir"); @@ -1917,7 +1972,8 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt, sort_sequence->specs[i] = sks; - parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.rset_term = rset_term_create (termz, -1, rank_type, + zapt->term->which); return rset_create (rset_kind_null, &parms); } @@ -1931,8 +1987,8 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, oid_value curAttributeSet = attributeSet; char term_dict[2048]; int base_no; + int reg_type = '0'; struct grep_info grep_info; - struct rpn_char_map_info rcmi; yaz_log (LOG_LOG, "rpn_search_xpath 1"); attr_init (&use, zapt, 1); @@ -1949,14 +2005,14 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, return rset; } - rpn_char_map_prepare (zh->reg, '0', &rcmi); - - if (grep_info_prepare (zh, zapt, &grep_info, '0', stream)) + dict_grep_cmap (zh->reg->dict, 0, 0); + if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) return 0; yaz_log (LOG_LOG, "rpn_search_xpath 2"); for (base_no = 0; base_no < num_bases; base_no++) { + const char *termp = use_string; rset_between_parms parms; RSET rset_start_tag, rset_end_tag; int ord, ord_len, i, r, max_pos; @@ -1986,8 +2042,11 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, } term_dict[prefix_len++] = ')'; term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = '0'; + term_dict[prefix_len++] = reg_type; + + termp = use_string; strcpy (term_dict+prefix_len, use_string); + grep_info.isam_p_indx = 0; yaz_log (LOG_LOG, "rpn_search_xpath 3 %s", term_dict+prefix_len); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, @@ -1997,7 +2056,7 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_start_tag = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, use_string, strlen(use_string), - rank_type, 1); + rank_type, 1, zapt->term->which); prefix_len = 0; ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, 2); @@ -2016,8 +2075,12 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, } term_dict[prefix_len++] = ')'; term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = '0'; + term_dict[prefix_len++] = reg_type; + + termp = use_string; + strcpy (term_dict+prefix_len, use_string); + grep_info.isam_p_indx = 0; r = dict_lookup_grep (zh->reg->dict, term_dict, 0, &grep_info, &max_pos, 0, grep_handle); @@ -2027,13 +2090,14 @@ static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_end_tag = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, use_string, strlen(use_string), - rank_type, 1); + rank_type, 1, zapt->term->which); parms.key_size = sizeof(struct it_key); parms.cmp = key_compare_it; parms.rset_l = rset_start_tag; parms.rset_m = rset; parms.rset_r = rset_end_tag; + parms.printer = key_print_it; yaz_log (LOG_LOG, "rpn_search_xpath 4"); rset = rset_create (rset_kind_between, &parms); } @@ -2065,12 +2129,8 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, logf (LOG_DEBUG, "search_type=%s", search_type); logf (LOG_DEBUG, "rank_type=%s", rank_type); - if (zapt->term->which != Z_Term_general) - { - zh->errCode = 124; - return NULL; - } - trans_term (zh, zapt, termz); + if (trans_term (zh, zapt, termz)) + return 0; if (sort_flag) return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence, @@ -2515,7 +2575,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, &glist[i+before].term, mterm); rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[i+before].term, strlen(glist[i+before].term), - NULL, 0); + NULL, 0, zapt->term->which); ptr[j0]++; for (j = j0+1; jterm->which); bool_parms.key_size = sizeof(struct it_key); bool_parms.cmp = key_compare_it; @@ -2581,7 +2642,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset = rset_trunc (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, glist[before-1-i].term, strlen(glist[before-1-i].term), - NULL, 0); + NULL, 0, zapt->term->which); ptr[j0]++; @@ -2597,7 +2658,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset2 = rset_trunc (zh, &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1, glist[before-1-i].term, - strlen(glist[before-1-i].term), NULL, 0); + strlen(glist[before-1-i].term), NULL, 0, + zapt->term->which); bool_parms.key_size = sizeof(struct it_key); bool_parms.cmp = key_compare_it;