From 6684933a7dbf61609c4c4a1db1ebb8d80169ad05 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 22 Jun 2006 15:07:20 +0000 Subject: [PATCH] Indexing system change. Introduced new index category type zinfo_index_category_t which is used to properly distinguish between index (normal index,), sort, alwaysmatches index, and anchor (to be implemented later). Also added support for proper alwaysmatches relation for X-Path searches. Bug #617. --- index/attribute.c | 13 +++- index/extract.c | 35 +++++---- index/index.h | 4 +- index/zinfo.c | 59 +++++++++++++- index/zinfo.h | 17 +++- index/zrpn.c | 214 +++++++++++++++++++++++++++++++++++++++++---------- index/zsets.c | 6 +- test/api/t2.c | 9 ++- test/api/zebra.cfg | 3 +- test/xpath/xpath1.c | 22 +++++- 10 files changed, 307 insertions(+), 75 deletions(-) diff --git a/index/attribute.c b/index/attribute.c index 10b1e6d..cb9d62c 100644 --- a/index/attribute.c +++ b/index/attribute.c @@ -1,4 +1,4 @@ -/* $Id: attribute.c,v 1.23 2006-05-19 13:49:34 adam Exp $ +/* $Id: attribute.c,v 1.24 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -67,6 +67,7 @@ static int att_getentbyatt(ZebraHandle zi, oid_value set, int att, ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh, Z_AttributeList *attr_list, + zinfo_index_category_t cat, int index_type, oid_value curAttributeSet, int *ord) @@ -105,7 +106,8 @@ ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh, zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0); return ZEBRA_FAIL; } - *ord = zebraExplain_lookup_attr_str(zh->reg->zei, index_type, use_string); + *ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat, + index_type, use_string); if (*ord == -1) { if (use_value < 0) @@ -119,6 +121,7 @@ ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh, ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + zinfo_index_category_t cat, int index_type, const char *xpath_use, oid_value curAttributeSet, @@ -126,10 +129,10 @@ ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh, { if (!xpath_use) return zebra_attr_list_get_ord(zh, zapt->attributes, - index_type, curAttributeSet, ord); + cat, index_type, curAttributeSet, ord); else { - *ord = zebraExplain_lookup_attr_str(zh->reg->zei, index_type, + *ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat, index_type, xpath_use); if (*ord == -1) { @@ -162,9 +165,11 @@ ZEBRA_RES zebra_sort_get_ord(ZebraHandle zh, *numerical = 1; if (zebra_attr_list_get_ord(zh, sortAttributes->list, + zinfo_index_category_sort, 's', VAL_BIB1, ord)== ZEBRA_OK) return ZEBRA_OK; if (zebra_attr_list_get_ord(zh, sortAttributes->list, + zinfo_index_category_sort, 'S', VAL_BIB1, ord)== ZEBRA_OK) return ZEBRA_OK; return ZEBRA_FAIL; diff --git a/index/extract.c b/index/extract.c index 1a5eada..d4d9b69 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.222 2006-06-22 09:48:08 adam Exp $ +/* $Id: extract.c,v 1.223 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -106,7 +106,9 @@ static void logRecord (ZebraHandle zh) } } -static void extract_add_index_string (RecWord *p, const char *str, int length); +static void extract_add_index_string (RecWord *p, + zinfo_index_category_t cat, + const char *str, int length); static void extract_set_store_data_prepare(struct recExtractCtrl *p); @@ -128,16 +130,17 @@ static void searchRecordKey(ZebraHandle zh, { int i; int ch = -1; + zinfo_index_category_t cat = zinfo_index_category_index; for (i = 0; ireg->zei, '0', index_name); + ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, '0', index_name); if (ch < 0) - ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'p', index_name); + ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'p', index_name); if (ch < 0) - ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'w', index_name); + ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'w', index_name); if (ch < 0) return ; @@ -403,7 +406,8 @@ static void all_matches_add(struct recExtractCtrl *ctrl) word.index_name = "allrecords"; word.index_type = 'w'; word.seqno = 1; - extract_add_index_string (&word, "", 0); + extract_add_index_string (&word, zinfo_index_category_alwaysmatches, + "", 0); } static ZEBRA_RES file_extract_record(ZebraHandle zh, @@ -1622,7 +1626,8 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) } } -static void extract_add_index_string(RecWord *p, const char *str, int length) +static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat, + const char *str, int length) { struct it_key key; @@ -1633,9 +1638,9 @@ static void extract_add_index_string(RecWord *p, const char *str, int length) if (!p->index_name) return; - ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name); if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); + ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name); key.len = 4; key.mem[0] = ch; @@ -1670,13 +1675,15 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) ZebraHandle zh = p->extractCtrl->handle; ZebraExplainInfo zei = zh->reg->zei; int ch; + zinfo_index_category_t cat = zinfo_index_category_sort; + if (!p->index_name) return; - ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name); if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); + ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name); key.len = 4; key.mem[0] = ch; key.mem[1] = p->record_id; @@ -1709,14 +1716,16 @@ static void extract_add_string (RecWord *p, const char *string, int length) extract_add_sort_string (p, string, length); else { - extract_add_index_string(p, string, length); + extract_add_index_string(p, zinfo_index_category_index, + string, length); if (zebra_maps_is_alwaysmatches(p->zebra_maps, p->index_type)) { RecWord word; memcpy(&word, p, sizeof(word)); word.seqno = 1; - extract_add_index_string (&word, "", 0); + extract_add_index_string( + &word, zinfo_index_category_alwaysmatches, "", 0); } } } diff --git a/index/index.h b/index/index.h index 1bb24f1..4e0eae2 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.167 2006-06-13 12:02:08 adam Exp $ +/* $Id: index.h,v 1.168 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -429,6 +429,7 @@ void zebra_term_untrans(ZebraHandle zh, int reg_type, ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + zinfo_index_category_t cat, int index_type, const char *xpath_use, oid_value curAttributeSet, @@ -436,6 +437,7 @@ ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh, ZEBRA_RES zebra_attr_list_get_ord(ZebraHandle zh, Z_AttributeList *attr_list, + zinfo_index_category_t cat, int index_type, oid_value curAttributeSet, int *ord); diff --git a/index/zinfo.c b/index/zinfo.c index 908fdd3..b7d4386 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,4 +1,4 @@ -/* $Id: zinfo.c,v 1.66 2006-06-13 12:02:12 adam Exp $ +/* $Id: zinfo.c,v 1.67 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -33,6 +33,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA struct zebSUInfo { int index_type; + zinfo_index_category_t cat; #define ZEB_SU_SET_USE 1 #define ZEB_SU_STR 2 int which; @@ -593,6 +594,7 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, data1_node *node_str = NULL; data1_node *node_ordinal = NULL; data1_node *node_type = NULL; + data1_node *node_cat = NULL; data1_node *node_doc_occurrences = NULL; data1_node *node_term_occurrences = NULL; data1_node *np2; @@ -610,6 +612,8 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, node_ordinal = np2->child; else if (!strcmp(np2->u.tag.tag, "type")) node_type = np2->child; + else if (!strcmp(np2->u.tag.tag, "cat")) + node_cat = np2->child; else if (!strcmp(np2->u.tag.tag, "dococcurrences")) node_doc_occurrences = np2->child; else if (!strcmp(np2->u.tag.tag, "termoccurrences")) @@ -632,6 +636,31 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, yaz_log(YLOG_WARN, "Missing attribute 'type' in attribute info"); (*zsuip)->info.index_type = 'w'; } + if (node_cat && node_cat->u.data.len > 0) + { + zinfo_index_category_t cat; + + data1_node *np = node_cat; + if (!strncmp(np->u.data.data, "index", np->u.data.len)) + cat = zinfo_index_category_index; + else if (!strncmp(np->u.data.data, "sort", np->u.data.len)) + cat = zinfo_index_category_sort; + else if (!strncmp(np->u.data.data, "alwaysmatches", + np->u.data.len)) + cat = zinfo_index_category_alwaysmatches; + else if (!strncmp(np->u.data.data, "anchor", + np->u.data.len)) + cat = zinfo_index_category_anchor; + else + { + yaz_log(YLOG_WARN, "Bad index cateogry '%.*s'", + np->u.data.len, np->u.data.data); + cat = zinfo_index_category_index; + } + (*zsuip)->info.cat = cat; + } + else + (*zsuip)->info.cat = zinfo_index_category_index; if (node_doc_occurrences) { @@ -1067,6 +1096,21 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, zsui->info.doc_occurrences, zei->nmem); data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences", zsui->info.term_occurrences, zei->nmem); + switch(zsui->info.cat) + { + case zinfo_index_category_index: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "index", zei->nmem); break; + case zinfo_index_category_sort: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "sort", zei->nmem); break; + case zinfo_index_category_alwaysmatches: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "alwaysmatches", zei->nmem); break; + case zinfo_index_category_anchor: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "anchor", zei->nmem); break; + } } /* convert to "SGML" and write it */ #if ZINFO_DEBUG @@ -1306,7 +1350,9 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) rec_put (zei->records, &trec); } -int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type, +int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type, const char *str) { struct zebSUInfoB **zsui; @@ -1315,6 +1361,7 @@ int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type, for (zsui = &zei->curDatabaseInfo->attributeDetails->SUInfo; *zsui; zsui = &(*zsui)->next) if ((*zsui)->info.index_type == index_type + && (*zsui)->info.cat == cat && (*zsui)->info.which == ZEB_SU_STR && !yaz_matchstr((*zsui)->info.u.str, str)) { @@ -1489,6 +1536,7 @@ void zebraExplain_addAttributeSet (ZebraExplainInfo zei, int set) } struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei, + zinfo_index_category_t cat, int index_type) { struct zebSUInfoB *zsui; @@ -1500,16 +1548,19 @@ struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei, zei->curDatabaseInfo->attributeDetails->dirty = 1; zei->dirty = 1; zsui->info.index_type = index_type; + zsui->info.cat = cat; zsui->info.doc_occurrences = 0; zsui->info.term_occurrences = 0; zsui->info.ordinal = (zei->ordinalSU)++; return zsui; } -int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type, +int zebraExplain_add_attr_str(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type, const char *index_name) { - struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, index_type); + struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, cat, index_type); zsui->info.which = ZEB_SU_STR; zsui->info.u.str = nmem_strdup(zei->nmem, index_name); diff --git a/index/zinfo.h b/index/zinfo.h index 3121162..eab4022 100644 --- a/index/zinfo.h +++ b/index/zinfo.h @@ -1,4 +1,4 @@ -/* $Id: zinfo.h,v 1.34 2006-06-13 12:02:13 adam Exp $ +/* $Id: zinfo.h,v 1.35 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -37,6 +37,13 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA YAZ_BEGIN_CDECL +typedef enum { + zinfo_index_category_index, + zinfo_index_category_sort, + zinfo_index_category_alwaysmatches, + zinfo_index_category_anchor +} zinfo_index_category_t; + typedef ZEBRA_RES ZebraExplainUpdateFunc(void *handle, Record drec, data1_node *n); @@ -55,9 +62,13 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database, int explain_database); int zebraExplain_add_attr_su(ZebraExplainInfo zei, int index_type, int set, int use); -int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type, +int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type, const char *str); -int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type, +int zebraExplain_add_attr_str(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type, const char *str); void zebraExplain_addSchema (ZebraExplainInfo zei, Odr_oid *oid); void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num); diff --git a/index/zrpn.c b/index/zrpn.c index 8cc6515..b1c7dcc 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.217 2006-06-07 10:50:08 adam Exp $ +/* $Id: zrpn.c,v 1.218 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -66,7 +66,7 @@ static const char **rpn_char_map_handler(void *vp, const char **from, int len) } static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type, - struct rpn_char_map_info *map_info) + struct rpn_char_map_info *map_info) { map_info->zm = reg->zebra_maps; map_info->reg_type = reg_type; @@ -1035,11 +1035,6 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, int attr_ok = 0; int regex_range = 0; int init_pos = 0; -#if 0 - attent attp; - data1_local_attribute id_xpath_attr; - data1_local_attribute *local_attr; -#endif int max_pos, prefix_len = 0; int relation_error; char ord_buf[32]; @@ -1054,7 +1049,8 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_FAIL; } - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, + if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index, + reg_type, xpath_use, curAttributeSet, &ord) != ZEBRA_OK) { @@ -1453,6 +1449,123 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh, return ZEBRA_OK; } + +static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + oid_value attributeSet, NMEM stream, + struct grep_info *grep_info, + int reg_type, int complete_flag, + int num_bases, char **basenames, + char *term_dst, + const char *xpath_use, + struct ord_list **ol) +{ + char term_dict[2*IT_MAX_WORD+4000]; + int r, base_no; + struct rpn_char_map_info rcmi; + + int bases_ok = 0; /* no of databases with OK attribute */ + + *ol = ord_list_create(stream); + + rpn_char_map_prepare (zh->reg, reg_type, &rcmi); + + for (base_no = 0; base_no < num_bases; base_no++) + { + int ord = -1; + int regex_range = 0; + int init_pos = 0; + int max_pos, prefix_len = 0; + char ord_buf[32]; + int ord_len, i; + + if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + { + zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, + basenames[base_no]); + return ZEBRA_FAIL; + } + + if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches, + reg_type, xpath_use, + attributeSet, &ord) != ZEBRA_OK) + return ZEBRA_FAIL; + yaz_log(YLOG_LOG, "Got ordinal value: %d", ord); + *ol = ord_list_append(stream, *ol, ord); + + if (prefix_len) + term_dict[prefix_len++] = '|'; + else + term_dict[prefix_len++] = '('; + + ord_len = key_SU_encode (ord, ord_buf); + for (i = 0; i init_pos) + init_pos = ord_len; + + bases_ok++; + + term_dict[prefix_len++] = ')'; + term_dict[prefix_len] = '\0'; + + r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range, + grep_info, &max_pos, init_pos, + grep_handle); + } + if (!bases_ok) + return ZEBRA_FAIL; + yaz_log(YLOG_LOG, "always_term: %d positions", grep_info->isam_p_indx); + return ZEBRA_OK; +} + +static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh, + Z_AttributesPlusTerm *zapt, + const char *termz_org, + oid_value attributeSet, + NMEM stream, + int reg_type, int complete_flag, + const char *rank_type, + const char *xpath_use, + int num_bases, char **basenames, + NMEM rset_nmem, + RSET *rset, + struct rset_key_control *kc) +{ + char term_dst[IT_MAX_WORD+1]; + struct grep_info grep_info; + zint hits_limit_value; + const char *term_ref_id_str = 0; + ZEBRA_RES res; + struct ord_list *ol; + + term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, + stream); + if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL) + return ZEBRA_FAIL; + + grep_info.isam_p_indx = 0; + + res = always_term(zh, zapt, attributeSet, stream, &grep_info, + reg_type, complete_flag, num_bases, basenames, + term_dst, xpath_use, &ol); + if (res == ZEBRA_OK) + { + *rset = rset_trunc(zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term_dst, strlen(term_dst), + rank_type, 1 /* preserve pos */, + zapt->term->which, rset_nmem, + kc, kc->scope, ol, reg_type, hits_limit_value, + term_ref_id_str); + if (!*rset) + res = ZEBRA_FAIL; + } + grep_info_delete (&grep_info); + return res; +} + static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, @@ -1662,8 +1775,8 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_FAIL; } - if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, - curAttributeSet, &ord) + if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index, + reg_type, xpath_use, curAttributeSet, &ord) != ZEBRA_OK) { break; @@ -1891,9 +2004,10 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } -static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - oid_value attributeSet, - struct xpath_location_step *xpath, int max, NMEM mem) +static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, + oid_value attributeSet, + struct xpath_location_step *xpath, int max, + NMEM mem) { oid_value curAttributeSet = attributeSet; AttrType use; @@ -1913,7 +2027,7 @@ static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, static RSET xpath_trunc(ZebraHandle zh, NMEM stream, int reg_type, const char *term, const char *xpath_use, - oid_value curAttributeSet, NMEM rset_nmem, + NMEM rset_nmem, struct rset_key_control *kc) { RSET rset; @@ -1921,7 +2035,9 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, char term_dict[2048]; char ord_buf[32]; int prefix_len = 0; - int ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_type, + int ord = zebraExplain_lookup_attr_str(zh->reg->zei, + zinfo_index_category_index, + reg_type, xpath_use); int ord_len, i, r, max_pos; int term_type = Z_Term_characterString; @@ -1962,7 +2078,6 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, static ZEBRA_RES rpn_search_xpath(ZebraHandle zh, - oid_value attributeSet, int num_bases, char **basenames, NMEM stream, const char *rank_type, RSET rset, int xpath_len, struct xpath_location_step *xpath, @@ -1970,9 +2085,9 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, RSET *rset_out, struct rset_key_control *kc) { - oid_value curAttributeSet = attributeSet; int base_no; int i; + int always_matches = rset ? 0 : 1; if (xpath_len < 0) { @@ -1987,8 +2102,6 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, } - curAttributeSet = VAL_IDXPATH; - /* //a -> a/.* //a/b -> b/a/.* @@ -2082,7 +2195,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, wrbuf_puts(wbuf, ""); rset_attr = xpath_trunc( zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, - curAttributeSet, rset_nmem, kc); + rset_nmem, kc); wrbuf_free(wbuf, 1); } else @@ -2096,18 +2209,20 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, rset_start_tag = xpath_trunc(zh, stream, '0', xpath_rev, ZEBRA_XPATH_ELM_BEGIN, - curAttributeSet, rset_nmem, kc); - - rset_end_tag = xpath_trunc(zh, stream, '0', - xpath_rev, - ZEBRA_XPATH_ELM_END, - curAttributeSet, - rset_nmem, kc); - - rset = rset_create_between(rset_nmem, kc, kc->scope, - rset_start_tag, rset, - rset_end_tag, rset_attr); + if (always_matches) + rset = rset_start_tag; + else + { + rset_end_tag = xpath_trunc(zh, stream, '0', + xpath_rev, + ZEBRA_XPATH_ELM_END, + rset_nmem, kc); + + rset = rset_create_between(rset_nmem, kc, kc->scope, + rset_start_tag, rset, + rset_end_tag, rset_attr); + } } first_path = 0; } @@ -2116,6 +2231,8 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh, return ZEBRA_OK; } +#define MAX_XPATH_STEPS 10 + static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, oid_value attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, @@ -2133,7 +2250,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char termz[IT_MAX_WORD+1]; int xpath_len; const char *xpath_use = 0; - struct xpath_location_step xpath[10]; + struct xpath_location_step xpath[MAX_XPATH_STEPS]; if (!log_level_set) { @@ -2155,7 +2272,8 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence, rank_type, rset_nmem, rset, kc); /* consider if an X-Path query is used */ - xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream); + xpath_len = rpn_check_xpath(zh, zapt, attributeSet, + xpath, MAX_XPATH_STEPS, stream); if (xpath_len >= 0) { if (xpath[xpath_len-1].part[0] == '@') @@ -2211,23 +2329,34 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } else if (!strcmp(search_type, "always")) { - *termz = '\0'; - res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream, - reg_id, complete_flag, rank_type, - xpath_use, - num_bases, basenames, rset_nmem, - rset, kc); + if (xpath_len >= 0) /* alwaysmatches and X-Path ? */ + { + *rset = 0; /* signal no "term" set */ + return rpn_search_xpath(zh, num_bases, basenames, + stream, rank_type, *rset, + xpath_len, xpath, rset_nmem, rset, kc); + } + else + { + res = rpn_search_APT_alwaysmatches(zh, zapt, termz, + attributeSet, stream, + reg_id, complete_flag, + rank_type, + xpath_use, + num_bases, basenames, rset_nmem, + rset, kc); + } } else { - zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0); + zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0); res = ZEBRA_FAIL; } if (res != ZEBRA_OK) return res; if (!*rset) return ZEBRA_FAIL; - return rpn_search_xpath(zh, attributeSet, num_bases, basenames, + return rpn_search_xpath(zh, num_bases, basenames, stream, rank_type, *rset, xpath_len, xpath, rset_nmem, rset, kc); } @@ -2619,7 +2748,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, return ZEBRA_FAIL; } - if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord) + if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index, + index_type, 0, attributeset, &ord) != ZEBRA_OK) { break; diff --git a/index/zsets.c b/index/zsets.c index a8d8ce9..3d7b61d 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.107 2006-06-07 10:14:42 adam Exp $ +/* $Id: zsets.c,v 1.108 2006-06-22 15:07:20 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -856,7 +856,9 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, i+1); sort_criteria[i].numerical = 0; sort_criteria[i].ord = - zebraExplain_lookup_attr_str(zh->reg->zei, 's', + zebraExplain_lookup_attr_str(zh->reg->zei, + zinfo_index_category_sort, + 's', sk->u.sortField); if (sks->which != Z_SortKeySpec_null && sort_criteria[i].ord == -1) diff --git a/test/api/t2.c b/test/api/t2.c index 449331a..ac30474 100644 --- a/test/api/t2.c +++ b/test/api/t2.c @@ -1,5 +1,5 @@ -/* $Id: t2.c,v 1.19 2006-05-10 08:13:35 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: t2.c,v 1.20 2006-06-22 15:07:20 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -34,7 +34,12 @@ void tst(int argc, char **argv) ZebraHandle zh = zebra_open(zs, 0); YAZ_CHECK(tl_init_data(zh, myrec)); + YAZ_CHECK(tl_query(zh, "@attr 1=title my", 1)); YAZ_CHECK(tl_query(zh, "@attr 1=4 my", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=title nope", 0)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 nope", 0)); + YAZ_CHECK(tl_query(zh, "@attr 1=title @attr 2=103 dummy", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 dummy", 1)); YAZ_CHECK(tl_close_down(zh, zs)); } diff --git a/test/api/zebra.cfg b/test/api/zebra.cfg index 354a1c2..b4b2bac 100644 --- a/test/api/zebra.cfg +++ b/test/api/zebra.cfg @@ -1,7 +1,8 @@ -# $Id: zebra.cfg,v 1.3 2004-06-15 08:06:33 adam Exp $ +# $Id: zebra.cfg,v 1.4 2006-06-22 15:07:20 adam Exp $ profilepath: ${srcdir:-.}/../../tab attset: bib1.att +attset: explain.att recordType: grs.sgml diff --git a/test/xpath/xpath1.c b/test/xpath/xpath1.c index b9356a3..a45e8bd 100644 --- a/test/xpath/xpath1.c +++ b/test/xpath/xpath1.c @@ -1,4 +1,4 @@ -/* $Id: xpath1.c,v 1.6 2006-05-10 08:13:41 adam Exp $ +/* $Id: xpath1.c,v 1.7 2006-06-22 15:07:21 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -31,8 +31,8 @@ static void tst(int argc, char **argv) const char *myrec[] = { " \n" " before \n" - " \n" - " inside \n" + " \n" + " inside it\n" " \n" " after \n" " \n", @@ -44,6 +44,7 @@ static void tst(int argc, char **argv) YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag before", 0)); YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag inside", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag {inside it}", 1)); YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag after", 0)); YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/none after", 0)); @@ -53,6 +54,21 @@ static void tst(int argc, char **argv) YAZ_CHECK(tl_query(zh, "@attr 1=/sgml inside", 1)); YAZ_CHECK(tl_query(zh, "@attr 1=/sgml after", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@x v", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@x no", 0)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@y v", 0)); + + YAZ_CHECK(tl_query(zh, "@attr 1=_XPATH_BEGIN @attr 4=3 tag/sgml/", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=_XPATH_BEGIN @attr 4=3 sgml/", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=_XPATH_BEGIN @attr 4=3 tag/", 0)); + + /* bug #617 */ + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag @attr 2=103 dummy", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml @attr 2=103 dummy", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=/tag @attr 2=103 dummy", 0)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@x @attr 2=103 dummy", 1)); + YAZ_CHECK(tl_query(zh, "@attr 1=/sgml/tag/@y @attr 2=103 dummy", 0)); + YAZ_CHECK(tl_close_down(zh, zs)); } -- 1.7.10.4