-/* $Id: rpnsearch.c,v 1.21 2007-11-08 21:21:58 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2009 Index Data
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
if (p->isam_p_buf)
{
memcpy(new_isam_p_buf, p->isam_p_buf,
- p->isam_p_indx * sizeof(*p->isam_p_buf));
+ p->isam_p_indx * sizeof(*p->isam_p_buf));
xfree(p->isam_p_buf);
}
p->isam_p_buf = new_isam_p_buf;
if (p->term_no)
{
memcpy(new_term_no, p->isam_p_buf,
- p->isam_p_indx * sizeof(*p->term_no));
+ p->isam_p_indx * sizeof(*p->term_no));
xfree(p->term_no);
}
p->term_no = new_term_no;
}
}
-#define REGEX_CHARS " []()|.*+?!"
+#define REGEX_CHARS " ^[]()|.*+?!\"$"
static void add_non_space(const char *start, const char *end,
WRBUF term_dict,
- char *dst_term, int *dst_ptr,
+ WRBUF display_term,
const char **map, int q_map_match)
{
size_t sz = end - start;
- memcpy(dst_term + *dst_ptr, start, sz);
- (*dst_ptr) += sz;
+
+ wrbuf_write(display_term, start, sz);
if (!q_map_match)
{
while (start < end)
static int term_100_icu(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
- char *dst_term)
+ WRBUF display_term,
+ int right_trunc)
{
- int no = 0;
+ int i;
const char *res_buf = 0;
size_t res_len = 0;
- int r = zebra_map_tokenize(zm, *src, strlen(*src),
- &res_buf, &res_len);
-
- yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
- if (r)
- strcat(dst_term, *src);
- *src += strlen(*src);
- while (r)
- {
- int i;
- no++;
- for (i = 0; i < res_len; i++)
+ const char *display_buf;
+ size_t display_len;
+ if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
+ &display_buf, &display_len))
+ {
+ *src += strlen(*src);
+ return 0;
+ }
+ wrbuf_write(display_term, display_buf, display_len);
+ if (right_trunc)
+ {
+ /* ICU sort keys seem to be of the form
+ basechars \x01 accents \x01 length
+ For now we'll just right truncate from basechars . This
+ may give false hits due to accents not being used.
+ */
+ i = res_len;
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ if (i > 0)
{
- if (strchr(REGEX_CHARS, res_buf[i]))
- wrbuf_putc(term_dict, '\\');
- if (res_buf[i] < 32)
- wrbuf_putc(term_dict, 1);
- wrbuf_putc(term_dict, res_buf[i]);
+ while (--i >= 0 && res_buf[i] != '\x01')
+ ;
+ }
+ if (i == 0)
+ { /* did not find base chars at all. Throw error */
+ return -1;
}
- r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
+ res_len = i; /* reduce res_len */
}
- return no;
+ for (i = 0; i < res_len; i++)
+ {
+ if (strchr(REGEX_CHARS "\\", res_buf[i]))
+ wrbuf_putc(term_dict, '\\');
+ if (res_buf[i] < 32)
+ wrbuf_putc(term_dict, 1);
+
+ wrbuf_putc(term_dict, res_buf[i]);
+ }
+ if (right_trunc)
+ wrbuf_puts(term_dict, ".*");
+ return 1;
}
/* term_100: handle term, where trunc = none(no operators at all) */
static int term_100(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
- char *dst_term)
+ WRBUF display_term)
{
const char *s0;
const char **map;
int i = 0;
- int j = 0;
const char *space_start = 0;
const char *space_end = 0;
- if (zebra_maps_is_icu(zm))
- return term_100_icu(zm, src, term_dict, space_split, dst_term);
-
if (!term_pre(zm, src, NULL, NULL, !space_split))
return 0;
s0 = *src;
{
if (strchr(REGEX_CHARS, *space_start))
wrbuf_putc(term_dict, '\\');
- dst_term[j++] = *space_start;
+ wrbuf_putc(display_term, *space_start);
wrbuf_putc(term_dict, *space_start);
space_start++;
}
i++;
- add_non_space(s1, s0, term_dict, dst_term, &j,
- map, q_map_match);
+ add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
}
- dst_term[j] = '\0';
*src = s0;
return i;
}
/* term_101: handle term, where trunc = Process # */
static int term_101(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
- char *dst_term)
+ WRBUF display_term)
{
const char *s0;
const char **map;
int i = 0;
- int j = 0;
if (!term_pre(zm, src, "#", "#", !space_split))
return 0;
{
i++;
wrbuf_puts(term_dict, ".*");
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
}
else
{
break;
i++;
- add_non_space(s1, s0, term_dict, dst_term, &j,
- map, q_map_match);
+ add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
}
}
- dst_term[j++] = '\0';
*src = s0;
return i;
}
/* term_103: handle term, where trunc = re-2 (regular expressions) */
static int term_103(zebra_map_t zm, const char **src,
WRBUF term_dict, int *errors, int space_split,
- char *dst_term)
+ WRBUF display_term)
{
int i = 0;
- int j = 0;
const char *s0;
const char **map;
{
if (strchr("^\\()[].*+?|-", *s0))
{
- dst_term[j++] = *s0;
+ wrbuf_putc(display_term, *s0);
wrbuf_putc(term_dict, *s0);
s0++;
i++;
break;
i++;
- add_non_space(s1, s0, term_dict, dst_term, &j,
- map, q_map_match);
+ add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
}
}
- dst_term[j] = '\0';
*src = s0;
return i;
/* term_103: handle term, where trunc = re-1 (regular expressions) */
static int term_102(zebra_map_t zm, const char **src,
- WRBUF term_dict, int space_split, char *dst_term)
+ WRBUF term_dict, int space_split, WRBUF display_term)
{
- return term_103(zm, src, term_dict, NULL, space_split, dst_term);
+ return term_103(zm, src, term_dict, NULL, space_split, display_term);
}
/* term_104: handle term, process # and ! */
static int term_104(zebra_map_t zm, const char **src,
- WRBUF term_dict, int space_split, char *dst_term)
+ WRBUF term_dict, int space_split, WRBUF display_term)
{
const char *s0;
const char **map;
int i = 0;
- int j = 0;
if (!term_pre(zm, src, "?*#", "?*#", !space_split))
return 0;
if (*s0 == '?')
{
i++;
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
if (*s0 >= '0' && *s0 <= '9')
{
int limit = 0;
while (*s0 >= '0' && *s0 <= '9')
{
limit = limit * 10 + (*s0 - '0');
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
}
if (limit > 20)
limit = 20;
{
i++;
wrbuf_puts(term_dict, ".*");
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
}
else if (*s0 == '#')
{
i++;
wrbuf_puts(term_dict, ".");
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
}
else
{
break;
i++;
- add_non_space(s1, s0, term_dict, dst_term, &j,
- map, q_map_match);
+ add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
}
}
- dst_term[j++] = '\0';
*src = s0;
return i;
}
/* term_105/106: handle term, where trunc = Process * and ! and right trunc */
static int term_105(zebra_map_t zm, const char **src,
WRBUF term_dict, int space_split,
- char *dst_term, int right_truncate)
+ WRBUF display_term, int right_truncate)
{
const char *s0;
const char **map;
int i = 0;
- int j = 0;
if (!term_pre(zm, src, "*!", "*!", !space_split))
return 0;
{
i++;
wrbuf_puts(term_dict, ".*");
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
}
else if (*s0 == '!')
{
i++;
wrbuf_putc(term_dict, '.');
- dst_term[j++] = *s0++;
+ wrbuf_putc(display_term, *s0);
+ s0++;
}
else
{
break;
i++;
- add_non_space(s1, s0, term_dict, dst_term, &j,
- map, q_map_match);
+ add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
}
}
if (right_truncate)
wrbuf_puts(term_dict, ".*");
- dst_term[j++] = '\0';
*src = s0;
return i;
}
static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
const char **term_sub, WRBUF term_dict,
const Odr_oid *attributeSet,
- zebra_map_t zm, int space_split, char *term_dst,
+ zebra_map_t zm, int space_split,
+ WRBUF display_term,
int *error_code)
{
AttrType relation;
switch (relation_value)
{
case 1:
- if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+ if (!term_100(zm, term_sub, term_component, space_split, display_term))
{
wrbuf_destroy(term_component);
return 0;
wrbuf_putc(term_dict, ')');
break;
case 2:
- if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+ if (!term_100(zm, term_sub, term_component, space_split, display_term))
{
wrbuf_destroy(term_component);
return 0;
wrbuf_putc(term_dict, ')');
break;
case 5:
- if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+ if (!term_100(zm, term_sub, term_component, space_split, display_term))
{
wrbuf_destroy(term_component);
return 0;
wrbuf_putc(term_dict, ')');
break;
case 4:
- if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+ if (!term_100(zm, term_sub, term_component, space_split, display_term))
{
wrbuf_destroy(term_component);
return 0;
if (!**term_sub)
return 1;
yaz_log(log_level_rpn, "Relation =");
- if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+ if (!term_100(zm, term_sub, term_component, space_split, display_term))
{
wrbuf_destroy(term_component);
return 0;
const Odr_oid *attributeSet, NMEM stream,
struct grep_info *grep_info,
const char *index_type, int complete_flag,
- char *term_dst,
+ WRBUF display_term,
const char *xpath_use,
- struct ord_list **ol);
+ struct ord_list **ol,
+ zebra_map_t zm);
ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
return ZEBRA_OK;
}
-static ZEBRA_RES term_trunc(ZebraHandle zh,
- Z_AttributesPlusTerm *zapt,
- const char **term_sub,
- const Odr_oid *attributeSet, NMEM stream,
- struct grep_info *grep_info,
- const char *index_type, int complete_flag,
- char *term_dst,
- const char *rank_type,
- const char *xpath_use,
- NMEM rset_nmem,
- RSET *rset,
- struct rset_key_control *kc)
+/** \brief search for term (which may be truncated)
+ */
+static ZEBRA_RES search_term(ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ const Odr_oid *attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ const char *index_type, int complete_flag,
+ const char *rank_type,
+ const char *xpath_use,
+ NMEM rset_nmem,
+ RSET *rset,
+ struct rset_key_control *kc,
+ zebra_map_t zm)
{
ZEBRA_RES res;
struct ord_list *ol;
zint hits_limit_value;
const char *term_ref_id_str = 0;
WRBUF term_dict = wrbuf_alloc();
-
+ WRBUF display_term = wrbuf_alloc();
*rset = 0;
zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
stream);
res = string_term(zh, zapt, term_sub, term_dict,
attributeSet, stream, grep_info,
index_type, complete_flag,
- term_dst, xpath_use, &ol);
+ display_term, xpath_use, &ol, zm);
wrbuf_destroy(term_dict);
- if (res != ZEBRA_OK)
- return res;
- if (!*term_sub) /* no more terms ? */
- return res;
- yaz_log(log_level_rpn, "term: %s", term_dst);
- *rset = rset_trunc(zh, grep_info->isam_p_buf,
- grep_info->isam_p_indx, term_dst,
- strlen(term_dst), rank_type, 1 /* preserve pos */,
- zapt->term->which, rset_nmem,
- kc, kc->scope, ol, index_type, hits_limit_value,
- term_ref_id_str);
- if (!*rset)
- return ZEBRA_FAIL;
- return ZEBRA_OK;
+ if (res == ZEBRA_OK && *term_sub)
+ {
+ yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
+ *rset = rset_trunc(zh, grep_info->isam_p_buf,
+ grep_info->isam_p_indx, wrbuf_buf(display_term),
+ wrbuf_len(display_term), rank_type,
+ 1 /* preserve pos */,
+ zapt->term->which, rset_nmem,
+ kc, kc->scope, ol, index_type, hits_limit_value,
+ term_ref_id_str);
+ if (!*rset)
+ res = ZEBRA_FAIL;
+ }
+ wrbuf_destroy(display_term);
+ return res;
}
static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
const Odr_oid *attributeSet, NMEM stream,
struct grep_info *grep_info,
const char *index_type, int complete_flag,
- char *term_dst,
+ WRBUF display_term,
const char *xpath_use,
- struct ord_list **ol)
+ struct ord_list **ol,
+ zebra_map_t zm)
{
int r;
AttrType truncation;
int relation_error;
char ord_buf[32];
int ord_len, i;
- zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
*ol = ord_list_create(stream);
wrbuf_putc(term_dict, ')');
prefix_len = wrbuf_len(term_dict);
-
- switch (truncation_value)
- {
- case -1: /* not specified */
- case 100: /* do not truncate */
- if (!string_relation(zh, zapt, &termp, term_dict,
- attributeSet,
- zm, space_split, term_dst,
- &relation_error))
+
+ if (zebra_maps_is_icu(zm))
+ {
+ int relation_value;
+ AttrType relation;
+
+ attr_init_APT(&relation, zapt, 2);
+ relation_value = attr_find(&relation, NULL);
+ if (relation_value == 103) /* always matches */
+ termp += strlen(termp); /* move to end of term */
+ else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
{
- if (relation_error)
+ /* ICU case */
+ switch (truncation_value)
{
- zebra_setError(zh, relation_error, 0);
+ case -1: /* not specified */
+ case 100: /* do not truncate */
+ if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ case 1: /* right truncation */
+ if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ default:
+ zebra_setError_zint(zh,
+ YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
+ truncation_value);
return ZEBRA_FAIL;
}
- *term_sub = 0;
- return ZEBRA_OK;
}
- break;
- case 1: /* right truncation */
- wrbuf_putc(term_dict, '(');
- if (!term_100(zm, &termp, term_dict, space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_puts(term_dict, ".*)");
- break;
- case 2: /* keft truncation */
- wrbuf_puts(term_dict, "(.*");
- if (!term_100(zm, &termp, term_dict, space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 3: /* left&right truncation */
- wrbuf_puts(term_dict, "(.*");
- if (!term_100(zm, &termp, term_dict, space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_puts(term_dict, ".*)");
- break;
- case 101: /* process # in term */
- wrbuf_putc(term_dict, '(');
- if (!term_101(zm, &termp, term_dict, space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_puts(term_dict, ")");
- break;
- case 102: /* Regexp-1 */
- wrbuf_putc(term_dict, '(');
- if (!term_102(zm, &termp, term_dict, space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 103: /* Regexp-2 */
- regex_range = 1;
- wrbuf_putc(term_dict, '(');
- if (!term_103(zm, &termp, term_dict, ®ex_range,
- space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 104: /* process # and ! in term */
- wrbuf_putc(term_dict, '(');
- if (!term_104(zm, &termp, term_dict, space_split, term_dst))
- {
- *term_sub = 0;
- return ZEBRA_OK;
- }
- wrbuf_putc(term_dict, ')');
- break;
- case 105: /* process * and ! in term */
- wrbuf_putc(term_dict, '(');
- if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
+ else
{
- *term_sub = 0;
- return ZEBRA_OK;
+ zebra_setError_zint(zh,
+ YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
+ relation_value);
+ return ZEBRA_FAIL;
}
- wrbuf_putc(term_dict, ')');
- break;
- case 106: /* process * and ! in term */
- wrbuf_putc(term_dict, '(');
- if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
+ }
+ else
+ {
+ /* non-ICU case. using string.chr and friends */
+ switch (truncation_value)
{
- *term_sub = 0;
- return ZEBRA_OK;
+ case -1: /* not specified */
+ case 100: /* do not truncate */
+ if (!string_relation(zh, zapt, &termp, term_dict,
+ attributeSet,
+ zm, space_split, display_term,
+ &relation_error))
+ {
+ if (relation_error)
+ {
+ zebra_setError(zh, relation_error, 0);
+ return ZEBRA_FAIL;
+ }
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ case 1: /* right truncation */
+ wrbuf_putc(term_dict, '(');
+ if (!term_100(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_puts(term_dict, ".*)");
+ break;
+ case 2: /* left truncation */
+ wrbuf_puts(term_dict, "(.*");
+ if (!term_100(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 3: /* left&right truncation */
+ wrbuf_puts(term_dict, "(.*");
+ if (!term_100(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_puts(term_dict, ".*)");
+ break;
+ case 101: /* process # in term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_101(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_puts(term_dict, ")");
+ break;
+ case 102: /* Regexp-1 */
+ wrbuf_putc(term_dict, '(');
+ if (!term_102(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 103: /* Regexp-2 */
+ regex_range = 1;
+ wrbuf_putc(term_dict, '(');
+ if (!term_103(zm, &termp, term_dict, ®ex_range,
+ space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 104: /* process # and ! in term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_104(zm, &termp, term_dict, space_split, display_term))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 105: /* process * and ! in term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ case 106: /* process * and ! in term */
+ wrbuf_putc(term_dict, '(');
+ if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ wrbuf_putc(term_dict, ')');
+ break;
+ default:
+ zebra_setError_zint(zh,
+ YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
+ truncation_value);
+ return ZEBRA_FAIL;
}
- wrbuf_putc(term_dict, ')');
- break;
- default:
- zebra_setError_zint(zh,
- YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
- truncation_value);
- return ZEBRA_FAIL;
}
if (1)
{
{
WRBUF pr_wr = wrbuf_alloc();
- wrbuf_verbose_str(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
+ wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
wrbuf_destroy(pr_wr);
}
termset_name = resname;
}
else
- termset_name = termset_value_string;
+ termset_name = termset_value_string;
yaz_log(log_level_rpn, "creating termset set %s", termset_name);
grep_info->termset = resultSetAdd(zh, termset_name, 1);
if (!grep_info->termset)
}
return ZEBRA_OK;
}
-
-/**
- \brief Create result set(s) for list of terms
- \param zh Zebra Handle
- \param zapt Attributes Plust Term (RPN leaf)
- \param termz term as used in query but converted to UTF-8
- \param attributeSet default attribute set
- \param stream memory for result
- \param index_type register type ("w", "p",..)
- \param complete_flag whether it's phrases or not
- \param rank_type term flags for ranking
- \param xpath_use use attribute for X-Path (-1 for no X-path)
- \param rset_nmem memory for result sets
- \param result_sets output result set for each term in list (output)
- \param num_result_sets number of output result sets
- \param kc rset key control to be used for created result sets
-*/
-static ZEBRA_RES term_list_trunc(ZebraHandle zh,
- Z_AttributesPlusTerm *zapt,
- const char *termz,
- const Odr_oid *attributeSet,
- NMEM stream,
- const char *index_type, int complete_flag,
- const char *rank_type,
- const char *xpath_use,
- NMEM rset_nmem,
- RSET **result_sets, int *num_result_sets,
- struct rset_key_control *kc)
+
+static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ const char *termz,
+ const Odr_oid *attributeSet,
+ NMEM stream,
+ const char *index_type, int complete_flag,
+ const char *rank_type,
+ const char *xpath_use,
+ NMEM rset_nmem,
+ RSET **result_sets, int *num_result_sets,
+ struct rset_key_control *kc,
+ zebra_map_t zm)
{
- char term_dst[IT_MAX_WORD+1];
struct grep_info grep_info;
const char *termp = termz;
int alloc_sets = 0;
-
+
*num_result_sets = 0;
- *term_dst = 0;
if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
return ZEBRA_FAIL;
- while(1)
+ while (1)
{
ZEBRA_RES res;
alloc_sets = alloc_sets + add;
*result_sets = rnew;
}
- res = term_trunc(zh, zapt, &termp, attributeSet,
- stream, &grep_info,
- index_type, complete_flag,
- term_dst, rank_type,
- xpath_use, rset_nmem,
- &(*result_sets)[*num_result_sets],
- kc);
+ res = search_term(zh, zapt, &termp, attributeSet,
+ stream, &grep_info,
+ index_type, complete_flag,
+ rank_type,
+ xpath_use, rset_nmem,
+ &(*result_sets)[*num_result_sets],
+ kc, zm);
if (res != ZEBRA_OK)
{
int i;
grep_info_delete(&grep_info);
return ZEBRA_OK;
}
+
+/**
+ \brief Create result set(s) for list of terms
+ \param zh Zebra Handle
+ \param zapt Attributes Plust Term (RPN leaf)
+ \param termz term as used in query but converted to UTF-8
+ \param attributeSet default attribute set
+ \param stream memory for result
+ \param index_type register type ("w", "p",..)
+ \param complete_flag whether it's phrases or not
+ \param rank_type term flags for ranking
+ \param xpath_use use attribute for X-Path (-1 for no X-path)
+ \param rset_nmem memory for result sets
+ \param result_sets output result set for each term in list (output)
+ \param num_result_sets number of output result sets
+ \param kc rset key control to be used for created result sets
+*/
+static ZEBRA_RES search_terms_list(ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ const char *termz,
+ const Odr_oid *attributeSet,
+ NMEM stream,
+ const char *index_type, int complete_flag,
+ const char *rank_type,
+ const char *xpath_use,
+ NMEM rset_nmem,
+ RSET **result_sets, int *num_result_sets,
+ struct rset_key_control *kc)
+{
+ zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
+ if (zebra_maps_is_icu(zm))
+ zebra_map_tokenize_start(zm, termz, strlen(termz));
+ return search_terms_chrmap(zh, zapt, termz, attributeSet,
+ stream, index_type, complete_flag,
+ rank_type, xpath_use,
+ rset_nmem, result_sets, num_result_sets,
+ kc, zm);
+}
-static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
- Z_AttributesPlusTerm *zapt,
- const Odr_oid *attributeSet,
- const char *index_type,
- NMEM rset_nmem,
- RSET *rset,
- struct rset_key_control *kc)
+
+/** \brief limit a search by position - returns result set
+ */
+static ZEBRA_RES search_position(ZebraHandle zh,
+ Z_AttributesPlusTerm *zapt,
+ const Odr_oid *attributeSet,
+ const char *index_type,
+ NMEM rset_nmem,
+ RSET *rset,
+ struct rset_key_control *kc)
{
int position_value;
AttrType position;
int ord_len;
char *val;
ISAM_P isam_p;
- zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
+ zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
attr_init_APT(&position, zapt, 3);
position_value = attr_find(&position, NULL);
return ZEBRA_FAIL;
}
- if (!zh->reg->isamb && !zh->reg->isamc)
- {
- zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
- position_value);
- return ZEBRA_FAIL;
- }
-
if (zebra_apt_get_ord(zh, zapt, index_type, 0,
attributeSet, &ord) != ZEBRA_OK)
{
{
assert(*val == sizeof(ISAM_P));
memcpy(&isam_p, val+1, sizeof(isam_p));
-
- if (zh->reg->isamb)
- *rset = rsisamb_create(rset_nmem, kc, kc->scope,
- zh->reg->isamb, isam_p, 0);
- else if (zh->reg->isamc)
- *rset = rsisamc_create(rset_nmem, kc, kc->scope,
- zh->reg->isamc, isam_p, 0);
+
+ *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
+ isam_p, 0);
}
return ZEBRA_OK;
}
-
+
+/** \brief returns result set for phrase search
+ */
static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
const Odr_oid *attributeSet,
NMEM stream,
- const char *index_type, int complete_flag,
+ const char *index_type,
+ int complete_flag,
const char *rank_type,
const char *xpath_use,
NMEM rset_nmem,
RSET *result_sets = 0;
int num_result_sets = 0;
ZEBRA_RES res =
- term_list_trunc(zh, zapt, termz_org, attributeSet,
- stream, index_type, complete_flag,
- rank_type, xpath_use,
- rset_nmem,
- &result_sets, &num_result_sets, kc);
-
+ search_terms_list(zh, zapt, termz_org, attributeSet,
+ stream, index_type, complete_flag,
+ rank_type, xpath_use,
+ rset_nmem,
+ &result_sets, &num_result_sets, kc);
+
if (res != ZEBRA_OK)
return res;
if (num_result_sets > 0)
{
RSET first_set = 0;
- res = rpn_search_APT_position(zh, zapt, attributeSet,
- index_type,
- rset_nmem, &first_set,
- kc);
+ res = search_position(zh, zapt, attributeSet,
+ index_type,
+ rset_nmem, &first_set,
+ kc);
if (res != ZEBRA_OK)
+ {
+ int i;
+ for (i = 0; i<num_result_sets; i++)
+ rset_delete(result_sets[i]);
return res;
+ }
if (first_set)
{
RSET *nsets = nmem_malloc(stream,
return ZEBRA_OK;
}
+/** \brief returns result set for or-list search
+ */
static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
int num_result_sets = 0;
int i;
ZEBRA_RES res =
- term_list_trunc(zh, zapt, termz_org, attributeSet,
- stream, index_type, complete_flag,
- rank_type, xpath_use,
- rset_nmem,
- &result_sets, &num_result_sets, kc);
+ search_terms_list(zh, zapt, termz_org, attributeSet,
+ stream, index_type, complete_flag,
+ rank_type, xpath_use,
+ rset_nmem,
+ &result_sets, &num_result_sets, kc);
if (res != ZEBRA_OK)
return res;
for (i = 0; i<num_result_sets; i++)
{
RSET first_set = 0;
- res = rpn_search_APT_position(zh, zapt, attributeSet,
- index_type,
- rset_nmem, &first_set,
- kc);
+ res = search_position(zh, zapt, attributeSet,
+ index_type,
+ rset_nmem, &first_set,
+ kc);
if (res != ZEBRA_OK)
{
for (i = 0; i<num_result_sets; i++)
return ZEBRA_OK;
}
+/** \brief returns result set for and-list search
+ */
static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
Z_AttributesPlusTerm *zapt,
const char *termz_org,
int num_result_sets = 0;
int i;
ZEBRA_RES res =
- term_list_trunc(zh, zapt, termz_org, attributeSet,
- stream, index_type, complete_flag,
- rank_type, xpath_use,
- rset_nmem,
- &result_sets, &num_result_sets,
- kc);
+ search_terms_list(zh, zapt, termz_org, attributeSet,
+ stream, index_type, complete_flag,
+ rank_type, xpath_use,
+ rset_nmem,
+ &result_sets, &num_result_sets,
+ kc);
if (res != ZEBRA_OK)
return res;
for (i = 0; i<num_result_sets; i++)
{
RSET first_set = 0;
- res = rpn_search_APT_position(zh, zapt, attributeSet,
- index_type,
- rset_nmem, &first_set,
- kc);
+ res = search_position(zh, zapt, attributeSet,
+ index_type,
+ rset_nmem, &first_set,
+ kc);
if (res != ZEBRA_OK)
{
for (i = 0; i<num_result_sets; i++)
*rset = result_sets[0];
else
*rset = rset_create_and(rset_nmem, kc, kc->scope,
- num_result_sets, result_sets);
+ num_result_sets, result_sets);
if (!*rset)
return ZEBRA_FAIL;
return ZEBRA_OK;
struct grep_info *grep_info,
int *max_pos,
zebra_map_t zm,
- char *term_dst,
+ WRBUF display_term,
int *error_code)
{
AttrType relation;
{
case 1:
yaz_log(log_level_rpn, "Relation <");
- if (!term_100(zm, term_sub, term_num, 1, term_dst))
+ if (!term_100(zm, term_sub, term_num, 1, display_term))
{
wrbuf_destroy(term_num);
return 0;
break;
case 2:
yaz_log(log_level_rpn, "Relation <=");
- if (!term_100(zm, term_sub, term_num, 1, term_dst))
+ if (!term_100(zm, term_sub, term_num, 1, display_term))
{
wrbuf_destroy(term_num);
return 0;
break;
case 4:
yaz_log(log_level_rpn, "Relation >=");
- if (!term_100(zm, term_sub, term_num, 1, term_dst))
+ if (!term_100(zm, term_sub, term_num, 1, display_term))
{
wrbuf_destroy(term_num);
return 0;
break;
case 5:
yaz_log(log_level_rpn, "Relation >");
- if (!term_100(zm, term_sub, term_num, 1, term_dst))
+ if (!term_100(zm, term_sub, term_num, 1, display_term))
{
wrbuf_destroy(term_num);
return 0;
case -1:
case 3:
yaz_log(log_level_rpn, "Relation =");
- if (!term_100(zm, term_sub, term_num, 1, term_dst))
+ if (!term_100(zm, term_sub, term_num, 1, display_term))
{
wrbuf_destroy(term_num);
return 0;
const Odr_oid *attributeSet, NMEM stream,
struct grep_info *grep_info,
const char *index_type, int complete_flag,
- char *term_dst,
+ WRBUF display_term,
const char *xpath_use,
struct ord_list **ol)
{
int relation_error = 0;
int ord, ord_len, i;
char ord_buf[32];
- zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
+ zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
*ol = ord_list_create(stream);
if (!numeric_relation(zh, zapt, &termp, term_dict,
attributeSet, grep_info, &max_pos, zm,
- term_dst, &relation_error))
+ display_term, &relation_error))
{
if (relation_error)
{
RSET *rset,
struct rset_key_control *kc)
{
- char term_dst[IT_MAX_WORD+1];
const char *termp = termz;
RSET *result_sets = 0;
int num_result_sets = 0;
{
struct ord_list *ol;
WRBUF term_dict = wrbuf_alloc();
+ WRBUF display_term = wrbuf_alloc();
if (alloc_sets == num_result_sets)
{
int add = 10;
res = numeric_term(zh, zapt, &termp, term_dict,
attributeSet, stream, &grep_info,
index_type, complete_flag,
- term_dst, xpath_use, &ol);
+ display_term, xpath_use, &ol);
wrbuf_destroy(term_dict);
if (res == ZEBRA_FAIL || termp == 0)
+ {
+ wrbuf_destroy(display_term);
break;
- yaz_log(YLOG_DEBUG, "term: %s", term_dst);
+ }
+ yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
result_sets[num_result_sets] =
rset_trunc(zh, grep_info.isam_p_buf,
- grep_info.isam_p_indx, term_dst,
- strlen(term_dst), rank_type,
+ grep_info.isam_p_indx, wrbuf_buf(display_term),
+ wrbuf_len(display_term), rank_type,
0 /* preserve position */,
zapt->term->which, rset_nmem,
kc, kc->scope, ol, index_type,
hits_limit_value,
term_ref_id_str);
+ wrbuf_destroy(display_term);
if (!result_sets[num_result_sets])
break;
num_result_sets++;
sort_sequence->num_specs = 10;
sort_sequence->specs = (Z_SortKeySpec **)
nmem_malloc(stream, sort_sequence->num_specs *
- sizeof(*sort_sequence->specs));
+ sizeof(*sort_sequence->specs));
for (i = 0; i<sort_sequence->num_specs; i++)
sort_sequence->specs[i] = 0;
}
i = 0;
else
i = atoi_n((char *) zapt->term->u.general->buf,
- zapt->term->u.general->len);
+ zapt->term->u.general->len);
if (i >= sort_sequence->num_specs)
i = 0;
sprintf(termz, "%d", i);
}
/*
- //a -> a/.*
- //a/b -> b/a/.*
- /a -> a/
- /a/b -> b/a/
+ //a -> a/.*
+ //a/b -> b/a/.*
+ /a -> a/
+ /a/b -> b/a/
- / -> none
+ / -> none
- a[@attr = value]/b[@other = othervalue]
+ a[@attr = value]/b[@other = othervalue]
- /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
- /a/b val range(b/a/,freetext(w,1016,val),b/a/)
- /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
- /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
- /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
- /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
+ /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
+ /a/b val range(b/a/,freetext(w,1016,val),b/a/)
+ /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
+ /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
+ /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
+ /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
*/
static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
const Odr_oid *attributeSet, NMEM stream,
Z_SortKeySpecList *sort_sequence,
- int num_bases, char **basenames,
+ int num_bases, const char **basenames,
NMEM rset_nmem,
RSET *rset,
struct rset_key_control *kc)
const Odr_oid *attributeSet,
NMEM stream, NMEM rset_nmem,
Z_SortKeySpecList *sort_sequence,
- int num_bases, char **basenames,
+ int num_bases, const char **basenames,
RSET **result_sets, int *num_result_sets,
Z_Operator *parent_op,
struct rset_key_control *kc);
const Odr_oid *attributeSet,
NMEM stream, NMEM rset_nmem,
Z_SortKeySpecList *sort_sequence,
- int num_bases, char **basenames,
+ int num_bases, const char **basenames,
RSET *result_set)
{
RSET *result_sets = 0;
const Odr_oid *attributeSet,
NMEM stream, NMEM rset_nmem,
Z_SortKeySpecList *sort_sequence,
- int num_bases, char **basenames,
+ int num_bases, const char **basenames,
RSET **result_sets, int *num_result_sets,
Z_Operator *parent_op,
struct rset_key_control *kc)
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab