X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=d3a12bdb25a1793446532433bdaa1219f47a3455;hb=b48a14d0d5fafde2fbbcc3c45451fc6f6de93c00;hp=1b74b9a1be64fdd8462a0fceda83029982bfc149;hpb=7a2d0f25682890bde5d8f2883d6020df2ed0b365;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 1b74b9a..d3a12bd 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.197 2005-06-07 14:53:39 adam Exp $ +/* $Id: zrpn.c,v 1.203 2005-08-08 12:04:02 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -24,7 +24,8 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #ifdef WIN32 #include -#else +#endif +#if HAVE_UNISTD_H #include #endif #include @@ -989,6 +990,43 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *term_dst, int xpath_use, struct ord_list **ol); +static ZEBRA_RES term_limits_APT(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + zint *hits_limit_value, + const char **term_ref_id_str) +{ + AttrType term_ref_id_attr; + AttrType hits_limit_attr; + + attr_init(&hits_limit_attr, zapt, 9); + *hits_limit_value = attr_find(&hits_limit_attr, NULL); + + attr_init(&term_ref_id_attr, zapt, 10); + attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str); + + /* no limit given ? */ + if (*hits_limit_value == -1) + if (*term_ref_id_str) + { + /* use global if term_ref is present */ + *hits_limit_value = zh->approx_limit; + } + else + { + /* no counting if term_ref is not present */ + *hits_limit_value = 0; + } + else if (*hits_limit_value == 0) + { + /* 0 is the same as global limit */ + *hits_limit_value = zh->approx_limit; + } + yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT, + *term_ref_id_str ? *term_ref_id_str : "none", + *hits_limit_value); + return ZEBRA_OK; +} + static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, @@ -1004,7 +1042,11 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, { ZEBRA_RES res; struct ord_list *ol; + zint hits_limit_value; + const char *term_ref_id_str = 0; *rset = 0; + + term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str); grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info, reg_type, complete_flag, num_bases, basenames, @@ -1018,19 +1060,13 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx, term_dst, strlen(term_dst), rank_type, 1 /* preserve pos */, zapt->term->which, rset_nmem, - kc, kc->scope, ol, reg_type); + kc, kc->scope, ol, reg_type, hits_limit_value, + term_ref_id_str); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; } -static char *nmem_strdup_i(NMEM nmem, int v) -{ - char val_str[64]; - sprintf(val_str, "%d", v); - return nmem_strdup(nmem, val_str); -} - static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, oid_value attributeSet, NMEM stream, @@ -1053,9 +1089,6 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int space_split = complete_flag ? 0 : 1; int bases_ok = 0; /* no of databases with OK attribute */ - int errCode = 0; /* err code (if any is not OK) */ - char *errString = 0; /* addinfo */ - *ol = ord_list_create(stream); @@ -1109,6 +1142,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } else if (use_string && (ord = zebraExplain_lookup_attr_str(zh->reg->zei, + reg_type, use_string)) >= 0) { /* we have a match for a raw string attribute */ @@ -1140,11 +1174,13 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (r == -1) { /* set was found, but value wasn't defined */ - errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; if (use_string) - errString = nmem_strdup(stream, use_string); + zebra_setError(zh, + YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, + use_string); else - errString = nmem_strdup_i (stream, use_value); + zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, + use_value); } else { @@ -1156,8 +1192,10 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, oident.value = curAttributeSet; oid_ent_to_oid (&oident, oid); - errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET; - errString = nmem_strdup(stream, oident.desc); + zebra_setError(zh, + YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, + oident.desc); + } continue; } @@ -1169,6 +1207,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int i, ord_len; ord = zebraExplain_lookup_attr_su(zh->reg->zei, + reg_type, attp.attset_ordinal, local_attr->local); if (ord < 0) @@ -1191,9 +1230,11 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, attr_ok = 1; term_dict[prefix_len++] = ')'; +#if REG_TYPE_PREFIX term_dict[prefix_len++] = 1; term_dict[prefix_len++] = reg_type; yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]); +#endif term_dict[prefix_len] = '\0'; j = prefix_len; switch (truncation_value) @@ -1330,10 +1371,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } } if (!bases_ok) - { - zebra_setError(zh, errCode, errString); return ZEBRA_FAIL; - } *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; @@ -1432,50 +1470,6 @@ static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } -char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz, NMEM stream, unsigned reg_id) -{ - WRBUF wrbuf = 0; - AttrType truncation; - int truncation_value; - char *ex_list = 0; - - attr_init(&truncation, zapt, 5); - truncation_value = attr_find(&truncation, NULL); - - switch (truncation_value) - { - default: - ex_list = ""; - break; - case 101: - ex_list = "#"; - break; - case 102: - case 103: - ex_list = 0; - break; - case 104: - ex_list = "!#"; - break; - case 105: - ex_list = "!*"; - break; - } - if (ex_list) - wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list, - termz, strlen(termz)); - if (!wrbuf) - return nmem_strdup(stream, termz); - else - { - char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1); - memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf)); - buf[wrbuf_len(wrbuf)] = '\0'; - return buf; - } -} - static void grep_info_delete(struct grep_info *grep_info) { #ifdef TERM_COUNT @@ -1533,7 +1527,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, /** \brief Create result set(s) for list of terms \param zh Zebra Handle - \param termz_org term as used in query but converted to UTF-8 + \param termz term as used in query but converted to UTF-8 \param attributeSet default attribute set \param stream memory for result \param reg_type register type ('w', 'p',..) @@ -1549,7 +1543,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, */ static ZEBRA_RES term_list_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char *termz_org, + const char *termz, oid_value attributeSet, NMEM stream, int reg_type, int complete_flag, @@ -1561,7 +1555,6 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, { char term_dst[IT_MAX_WORD+1]; struct grep_info grep_info; - char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); const char *termp = termz; int alloc_sets = 0; @@ -1797,8 +1790,6 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct rpn_char_map_info rcmi; int bases_ok = 0; /* no of databases with OK attribute */ - int errCode = 0; /* err code (if any is not OK) */ - char *errString = 0; /* addinfo */ rpn_char_map_prepare (zh->reg, reg_type, &rcmi); attr_init(&use, zapt, 1); @@ -1834,20 +1825,22 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, else { if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value, - use_string))) + use_string))) { yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d", curAttributeSet, use_value, r); if (r == -1) { - errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; if (use_string) - errString = nmem_strdup(stream, use_string); + zebra_setError(zh, + YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, + use_string); else - errString = nmem_strdup_i (stream, use_value); + zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, + use_value); } else - errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET; + zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0); continue; } } @@ -1865,6 +1858,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int i, ord_len; ord = zebraExplain_lookup_attr_su(zh->reg->zei, + reg_type, attp.attset_ordinal, local_attr->local); if (ord < 0) @@ -1887,10 +1881,12 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, continue; } bases_ok++; - term_dict[prefix_len++] = ')'; + term_dict[prefix_len++] = ')'; +#if REG_TYPE_PREFIX term_dict[prefix_len++] = 1; term_dict[prefix_len++] = reg_type; yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]); +#endif term_dict[prefix_len] = '\0'; if (!numeric_relation(zh, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos, reg_type, @@ -1906,15 +1902,13 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } } if (!bases_ok) - { - zebra_setError(zh, errCode, errString); return ZEBRA_FAIL; - } *term_sub = termp; yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx); return ZEBRA_OK; } + static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, @@ -1934,6 +1928,10 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, ZEBRA_RES res; struct grep_info grep_info; int alloc_sets = 0; + zint hits_limit_value; + const char *term_ref_id_str = 0; + + term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) @@ -1965,7 +1963,9 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, strlen(term_dst), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_type); + kc, kc->scope, 0, reg_type, + hits_limit_value, + term_ref_id_str); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -2124,7 +2124,8 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, char term_dict[2048]; char ord_buf[32]; int prefix_len = 0; - int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use); + int ord = zebraExplain_lookup_attr_su(zh->reg->zei, reg_type, + curAttributeSet, use); int ord_len, i, r, max_pos; int term_type = Z_Term_characterString; const char *flags = "void"; @@ -2146,9 +2147,10 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, term_dict[prefix_len++] = ord_buf[i]; } term_dict[prefix_len++] = ')'; +#if REG_TYPE_PREFIX term_dict[prefix_len++] = 1; term_dict[prefix_len++] = reg_type; - +#endif strcpy(term_dict+prefix_len, term); grep_info.isam_p_indx = 0; @@ -2159,7 +2161,8 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, rset = rset_trunc(zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term, strlen(term), flags, 1, term_type,rset_nmem, - kc, kc->scope, 0, reg_type); + kc, kc->scope, 0, reg_type, 0 /* hits_limit */, + 0 /* term_ref_id_str */); grep_info_delete(&grep_info); return rset; } @@ -2698,28 +2701,29 @@ void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type, *dst = nmem_strdup(stream, term_src); } -static void count_set (RSET r, int *count) +static void count_set(ZebraHandle zh, RSET rset, zint *count) { zint psysno = 0; - int kno = 0; struct it_key key; RSFD rfd; yaz_log(YLOG_DEBUG, "count_set"); + rset->hits_limit = zh->approx_limit; + *count = 0; - rfd = rset_open (r, RSETF_READ); - while (rset_read (rfd, &key,0 /* never mind terms */)) + rfd = rset_open(rset, RSETF_READ); + while (rset_read(rfd, &key,0 /* never mind terms */)) { if (key.mem[0] != psysno) { psysno = key.mem[0]; - (*count)++; + if (rfd->counted_items >= rset->hits_limit) + break; } - kno++; } rset_close (rfd); - yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count); + *count = rset->hits_count; } ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, @@ -2819,7 +2823,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, } if (use_string && - (ord = zebraExplain_lookup_attr_str(zh->reg->zei, + (ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_id, use_string)) >= 0) { /* we have a match for a raw string attribute */ @@ -2857,7 +2861,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, for (local_attr = attp.local_attributes; local_attr && ord_no < 32; local_attr = local_attr->next) { - ord = zebraExplain_lookup_attr_su(zh->reg->zei, + ord = zebraExplain_lookup_attr_su(zh->reg->zei, reg_id, attp.attset_ordinal, local_attr->local); if (ord > 0) @@ -2911,7 +2915,9 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, scan_info->list[j].term = NULL; prefix_len += key_SU_encode (ords[i], termz + prefix_len); +#if REG_TYPE_PREFIX termz[prefix_len++] = reg_id; +#endif termz[prefix_len] = 0; strcpy(scan_info->prefix, termz); @@ -2963,7 +2969,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */); } ptr[j0]++; /* move index for this set .. */ /* get result set for remaining scan terms */ @@ -2984,7 +2991,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which,rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */ ); rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, 2, rsets); @@ -2994,6 +3002,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, } if (lo >= 0) { + zint count; /* merge with limit_set if given */ if (limit_set) { @@ -3006,7 +3015,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, 2, rsets); } /* count it */ - count_set(rset, &glist[lo].occurrences); + count_set(zh, rset, &count); + glist[lo].occurrences = count; rset_delete(rset); } } @@ -3033,6 +3043,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, const char *tst; RSET rset; int lo = before-1-i; /* offset in result list */ + zint count; for (j = 0; j term->which, rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */); ptr[j0]++; @@ -3073,7 +3085,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, reg_id); + kc, kc->scope, 0, reg_id, 0 /* hits_limit */, + 0 /* term_ref_id_str */); rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, 2, rsets); @@ -3089,7 +3102,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset = rsmulti_and_create(rset_nmem, kc, kc->scope, 2, rsets); } - count_set (rset, &glist[lo].occurrences); + count_set(zh, rset, &count); + glist[lo].occurrences = count; rset_delete (rset); } (*kc->dec)(kc);