X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=28a6670aecabf8e6af277f65abc4b0e7c31d0985;hp=909e9cc7bcdd7066082dc3220cec6f8ec134b3e5;hb=1872e3fc60b482771bbd1cb4b0290b8d6a9ef5d0;hpb=1ab2b4589da1d33372cc1f9a87afdac160ca11de diff --git a/index/rpnsearch.c b/index/rpnsearch.c index 909e9cc..28a6670 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,5 +1,5 @@ -/* $Id: rpnsearch.c,v 1.1 2006-09-21 08:56:52 adam Exp $ - Copyright (C) 1995-2006 +/* $Id: rpnsearch.c,v 1.11 2007-04-16 08:44:32 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -78,19 +78,25 @@ struct grep_info { ISAM_P *isam_p_buf; int isam_p_size; int isam_p_indx; + int trunc_max; ZebraHandle zh; int reg_type; ZebraSet termset; }; -static void add_isam_p(const char *name, const char *info, - struct grep_info *p) +static int add_isam_p(const char *name, const char *info, + struct grep_info *p) { if (!log_level_set) { log_level_rpn = yaz_log_module_level("rpn"); log_level_set = 1; } + /* we may have to stop this madness.. NOTE: -1 so that if + truncmax == trunxlimit we do *not* generate result sets */ + if (p->isam_p_indx >= p->trunc_max - 1) + return 1; + if (p->isam_p_indx == p->isam_p_size) { ISAM_P *new_isam_p_buf; @@ -140,12 +146,12 @@ static void add_isam_p(const char *name, const char *info, index_name, term_tmp); } (p->isam_p_indx)++; + return 0; } static int grep_handle(char *name, const char *info, void *p) { - add_isam_p(name, info, (struct grep_info *) p); - return 0; + return add_isam_p(name, info, (struct grep_info *) p); } static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src, @@ -697,7 +703,7 @@ void string_rel_add_char(char **term_p, const char *src, int *indx) */ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, char *term_dict, - oid_value attributeSet, + const int *attributeSet, int reg_type, int space_split, char *term_dst, int *error_code) { @@ -734,6 +740,10 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp++ = '['; *term_tmp++ = '^'; + + *term_tmp++ = 1; + *term_tmp++ = FIRST_IN_FIELD_CHAR; + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = '-'; @@ -746,6 +756,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } *term_tmp++ = ')'; *term_tmp = '\0'; + yaz_log(YLOG_LOG, "term_dict=%s", term_dict); break; case 2: if (!term_100(zh->reg->zebra_maps, reg_type, @@ -764,6 +775,10 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, *term_tmp++ = '['; *term_tmp++ = '^'; + + *term_tmp++ = 1; + *term_tmp++ = FIRST_IN_FIELD_CHAR; + string_rel_add_char(&term_tmp, term_component, &i); *term_tmp++ = '-'; @@ -882,7 +897,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + const int *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -940,7 +955,7 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh, static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + const int *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -980,7 +995,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + const int *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -1168,7 +1183,9 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, grep_info, &max_pos, ord_len /* number of "exact" chars */, grep_handle); - if (r) + if (r == 1) + zebra_set_partial_result(zh); + else if (r) yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r); } if (!bases_ok) @@ -1193,47 +1210,59 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, struct grep_info *grep_info, int reg_type) { - AttrType termset; - int termset_value_numeric; - const char *termset_value_string; - #ifdef TERM_COUNT grep_info->term_no = 0; #endif + grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000")); grep_info->isam_p_size = 0; grep_info->isam_p_buf = NULL; grep_info->zh = zh; grep_info->reg_type = reg_type; grep_info->termset = 0; - if (!zapt) - return ZEBRA_OK; - attr_init_APT(&termset, zapt, 8); - termset_value_numeric = - attr_find_ex(&termset, NULL, &termset_value_string); - if (termset_value_numeric != -1) + if (zapt) { + AttrType truncmax; + int truncmax_value; + + attr_init_APT(&truncmax, zapt, 13); + truncmax_value = attr_find(&truncmax, NULL); + if (truncmax_value != -1) + grep_info->trunc_max = truncmax_value; + } + if (zapt) + { + AttrType termset; + int termset_value_numeric; + const char *termset_value_string; + + attr_init_APT(&termset, zapt, 8); + termset_value_numeric = + attr_find_ex(&termset, NULL, &termset_value_string); + if (termset_value_numeric != -1) + { #if TERMSET_DISABLE - zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset"); - return ZEBRA_FAIL; + zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset"); + return ZEBRA_FAIL; #else - char resname[32]; - const char *termset_name = 0; - if (termset_value_numeric != -2) - { - - sprintf(resname, "%d", termset_value_numeric); - termset_name = resname; - } - else + char resname[32]; + const char *termset_name = 0; + if (termset_value_numeric != -2) + { + + sprintf(resname, "%d", termset_value_numeric); + termset_name = resname; + } + else termset_name = termset_value_string; - yaz_log(log_level_rpn, "creating termset set %s", termset_name); - grep_info->termset = resultSetAdd(zh, termset_name, 1); - if (!grep_info->termset) - { - zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name); - return ZEBRA_FAIL; - } + yaz_log(log_level_rpn, "creating termset set %s", termset_name); + grep_info->termset = resultSetAdd(zh, termset_name, 1); + if (!grep_info->termset) + { + zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name); + return ZEBRA_FAIL; + } #endif + } } return ZEBRA_OK; } @@ -1259,7 +1288,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, static ZEBRA_RES term_list_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const int *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1321,7 +1350,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, + const int *attributeSet, int reg_type, int num_bases, char **basenames, NMEM rset_nmem, @@ -1413,7 +1442,7 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const int *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1473,7 +1502,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const int *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1540,7 +1569,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, - oid_value attributeSet, + const int *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1609,7 +1638,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, char *term_dict, - oid_value attributeSet, + const int *attributeSet, struct grep_info *grep_info, int *max_pos, int reg_type, @@ -1683,7 +1712,10 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp); r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos, 0, grep_handle); - if (r) + + if (r == 1) + zebra_set_partial_result(zh); + else if (r) yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r); yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx); return 1; @@ -1691,7 +1723,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, - oid_value attributeSet, NMEM stream, + const int *attributeSet, NMEM stream, struct grep_info *grep_info, int reg_type, int complete_flag, int num_bases, char **basenames, @@ -1768,7 +1800,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const int *attributeSet, NMEM stream, int reg_type, int complete_flag, const char *rank_type, @@ -1848,32 +1880,46 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, - oid_value attributeSet, + const int *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) { - RSFD rsfd; - struct it_key key; - int sys; - *rset = rset_create_temp(rset_nmem, kc, kc->scope, - res_get (zh->res, "setTmpDir"),0 ); - rsfd = rset_open(*rset, RSETF_WRITE); + Record rec; + zint sysno = atozint(termz); - sys = atoi(termz); - if (sys <= 0) - sys = 1; - key.mem[0] = sys; - key.mem[1] = 1; - key.len = 2; - rset_write (rsfd, &key); - rset_close (rsfd); + if (sysno <= 0) + sysno = 0; + rec = rec_get(zh->reg->records, sysno); + if (!rec) + sysno = 0; + + rec_free(&rec); + + if (sysno <= 0) + { + *rset = rset_create_null(rset_nmem, kc, 0); + } + else + { + RSFD rsfd; + struct it_key key; + *rset = rset_create_temp(rset_nmem, kc, kc->scope, + res_get(zh->res, "setTmpDir"), 0); + rsfd = rset_open(*rset, RSETF_WRITE); + + key.mem[0] = sysno; + key.mem[1] = 1; + key.len = 2; + rset_write(rsfd, &key); + rset_close(rsfd); + } return ZEBRA_OK; } static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const int *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, @@ -1885,8 +1931,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, AttrType sort_relation_type; Z_SortKeySpec *sks; Z_SortKey *sk; - int oid[OID_SIZE]; - oident oe; char termz[20]; attr_init_APT(&sort_relation_type, zapt, 7); @@ -1910,12 +1954,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, i = 0; sprintf(termz, "%d", i); - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = attributeSet; - if (!oid_ent_to_oid (&oe, oid)) - return ZEBRA_FAIL; - sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks)); sks->sortElement = (Z_SortElement *) nmem_malloc(stream, sizeof(*sks->sortElement)); @@ -1926,7 +1964,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, sk->u.sortAttributes = (Z_SortAttributes *) nmem_malloc(stream, sizeof(*sk->u.sortAttributes)); - sk->u.sortAttributes->id = oid; + sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet); sk->u.sortAttributes->list = zapt->attributes; sks->sortRelation = (int *) @@ -1951,11 +1989,11 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, + const int *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem) { - oid_value curAttributeSet = attributeSet; + const int *curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; @@ -2129,26 +2167,25 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, cp++; } } - wrbuf_puts(wbuf, ""); rset_attr = xpath_trunc( - zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, + zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, rset_nmem, kc); - wrbuf_free(wbuf, 1); + wrbuf_destroy(wbuf); } else { if (!first_path) { - wrbuf_free(xpath_rev, 1); + wrbuf_destroy(xpath_rev); continue; } } - yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, - wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev)); + yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, + wrbuf_cstr(xpath_rev)); if (wrbuf_len(xpath_rev)) { rset_start_tag = xpath_trunc(zh, stream, '0', - wrbuf_buf(xpath_rev), + wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_BEGIN, rset_nmem, kc); if (always_matches) @@ -2156,7 +2193,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, else { rset_end_tag = xpath_trunc(zh, stream, '0', - wrbuf_buf(xpath_rev), + wrbuf_cstr(xpath_rev), ZEBRA_XPATH_ELM_END, rset_nmem, kc); @@ -2165,7 +2202,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, rset_end_tag, rset_attr); } } - wrbuf_free(xpath_rev, 1); + wrbuf_destroy(xpath_rev); first_path = 0; } } @@ -2176,7 +2213,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, #define MAX_XPATH_STEPS 10 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, NMEM stream, + const int *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, NMEM rset_nmem, @@ -2301,7 +2338,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const int *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, @@ -2309,8 +2346,39 @@ static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, Z_Operator *parent_op, struct rset_key_control *kc); +ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, + zint *approx_limit) +{ + ZEBRA_RES res = ZEBRA_OK; + if (zs->which == Z_RPNStructure_complex) + { + if (res == ZEBRA_OK) + res = rpn_get_top_approx_limit(zh, zs->u.complex->s1, + approx_limit); + if (res == ZEBRA_OK) + res = rpn_get_top_approx_limit(zh, zs->u.complex->s2, + approx_limit); + } + else if (zs->which == Z_RPNStructure_simple) + { + if (zs->u.simple->which == Z_Operand_APT) + { + Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm; + AttrType global_hits_limit_attr; + int l; + + attr_init_APT(&global_hits_limit_attr, zapt, 12); + + l = attr_find(&global_hits_limit_attr, NULL); + if (l != -1) + *approx_limit = l; + } + } + return res; +} + ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const int *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames, @@ -2347,7 +2415,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, } ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, - oid_value attributeSet, + const int *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, char **basenames,