X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnsearch.c;h=1a82f9987f62b1fd0403348229e9083c2e3da319;hb=95a5868bb03e3445b1e234a969358b6eaa74a49c;hp=ca44a8139d0b9d0d53ddabac07f3ca49ad35b9b2;hpb=6f7dfe3c3f09f7104c1ae7616c9d207edeab308d;p=idzebra-moved-to-github.git diff --git a/index/rpnsearch.c b/index/rpnsearch.c index ca44a81..1a82f99 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -1,4 +1,4 @@ -/* $Id: rpnsearch.c,v 1.18 2007-10-30 19:17:15 adam Exp $ +/* $Id: rpnsearch.c,v 1.25 2007-12-03 13:04:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -65,7 +65,10 @@ void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info) { map_info->zm = zm; - dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); + if (zebra_maps_is_icu(zm)) + dict_grep_cmap(reg->dict, 0, 0); + else + dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler); } #define TERM_COUNT @@ -79,7 +82,7 @@ struct grep_info { int isam_p_indx; int trunc_max; ZebraHandle zh; - int reg_type; + const char *index_type; ZebraSet termset; }; @@ -135,7 +138,7 @@ static int add_isam_p(const char *name, const char *info, const char *index_name; int len = key_SU_decode(&ord, (const unsigned char *) name); - zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len); + zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len); yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord(p->zh->reg->zei, ord, 0 /* index_type */, &db, &index_name); @@ -232,6 +235,38 @@ static void add_non_space(const char *start, const char *end, } } + +static int term_100_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + char *dst_term) +{ + int no = 0; + const char *res_buf = 0; + size_t res_len = 0; + int r = zebra_map_tokenize(zm, *src, strlen(*src), + &res_buf, &res_len); + + yaz_log(YLOG_LOG, "term_100_icu r=%d", r); + if (r) + strcat(dst_term, *src); + *src += strlen(*src); + while (r) + { + int i; + no++; + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS, res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, 1); + wrbuf_putc(term_dict, res_buf[i]); + } + r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len); + } + return no; +} + /* term_100: handle term, where trunc = none(no operators at all) */ static int term_100(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, @@ -245,6 +280,9 @@ static int term_100(zebra_map_t zm, const char *space_start = 0; const char *space_end = 0; + if (zebra_maps_is_icu(zm)) + return term_100_icu(zm, src, term_dict, space_split, dst_term); + if (!term_pre(zm, src, NULL, NULL, !space_split)) return 0; s0 = *src; @@ -835,11 +873,11 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *xpath_use, struct ord_list **ol); -static ZEBRA_RES term_limits_APT(ZebraHandle zh, - Z_AttributesPlusTerm *zapt, - zint *hits_limit_value, - const char **term_ref_id_str, - NMEM nmem) +ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + zint *hits_limit_value, + const char **term_ref_id_str, + NMEM nmem) { AttrType term_ref_id_attr; AttrType hits_limit_attr; @@ -902,7 +940,8 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, WRBUF term_dict = wrbuf_alloc(); *rset = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); grep_info->isam_p_indx = 0; res = string_term(zh, zapt, term_sub, term_dict, attributeSet, stream, grep_info, @@ -948,8 +987,8 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_error; char ord_buf[32]; int ord_len, i; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type); - + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); + *ol = ord_list_create(stream); rpn_char_map_prepare(zh->reg, zm, &rcmi); @@ -1095,8 +1134,13 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *input = wrbuf_cstr(term_dict) + prefix_len; esc_str(buf, sizeof(buf), input, strlen(input)); } - yaz_log(log_level_rpn, "dict_lookup_grep: %s", - wrbuf_cstr(term_dict) + prefix_len); + { + WRBUF pr_wr = wrbuf_alloc(); + + wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict)); + yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr)); + wrbuf_destroy(pr_wr); + } r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range, grep_info, &max_pos, ord_len /* number of "exact" chars */, @@ -1123,7 +1167,7 @@ static void grep_info_delete(struct grep_info *grep_info) static ZEBRA_RES grep_info_prepare(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, - int reg_type) + const char *index_type) { #ifdef TERM_COUNT grep_info->term_no = 0; @@ -1132,7 +1176,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh, grep_info->isam_p_size = 0; grep_info->isam_p_buf = NULL; grep_info->zh = zh; - grep_info->reg_type = reg_type; + grep_info->index_type = index_type; grep_info->termset = 0; if (zapt) { @@ -1217,7 +1261,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, *num_result_sets = 0; *term_dst = 0; - if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while(1) { @@ -1275,7 +1319,7 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, int ord_len; char *val; ISAM_P isam_p; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type); + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); attr_init_APT(&position, zapt, 3); position_value = attr_find(&position, NULL); @@ -1301,13 +1345,6 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, return ZEBRA_FAIL; } - if (!zh->reg->isamb && !zh->reg->isamc) - { - zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE, - position_value); - return ZEBRA_FAIL; - } - if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeSet, &ord) != ZEBRA_OK) { @@ -1321,13 +1358,9 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh, { assert(*val == sizeof(ISAM_P)); memcpy(&isam_p, val+1, sizeof(isam_p)); - - if (zh->reg->isamb) - *rset = rsisamb_create(rset_nmem, kc, kc->scope, - zh->reg->isamb, isam_p, 0); - else if (zh->reg->isamc) - *rset = rsisamc_create(rset_nmem, kc, kc->scope, - zh->reg->isamc, isam_p, 0); + + *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, + isam_p, 0); } return ZEBRA_OK; } @@ -1634,7 +1667,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int relation_error = 0; int ord, ord_len, i; char ord_buf[32]; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type); + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); *ol = ord_list_create(stream); @@ -1703,10 +1736,11 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, zint hits_limit_value; const char *term_ref_id_str = 0; - term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream); + zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); yaz_log(log_level_rpn, "APT_numeric t='%s'", termz); - if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL) + if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL) return ZEBRA_FAIL; while (1) { @@ -1904,7 +1938,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, int ord = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_index, index_type, xpath_use); - if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL) + if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL) return rset_create_null(rset_nmem, kc, 0); if (ord < 0) @@ -2098,7 +2132,7 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc) @@ -2268,7 +2302,7 @@ static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc); @@ -2308,7 +2342,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET *result_set) { RSET *result_sets = 0; @@ -2345,7 +2379,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, - int num_bases, char **basenames, + int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc)