-/* $Id: zrpn.c,v 1.123 2002-09-18 21:01:15 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: zrpn.c,v 1.133 2003-04-15 20:48:04 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
Index Data Aps
This file is part of the Zebra server.
#include <ctype.h>
#include "index.h"
+#include <zebra_xpath.h>
#include <charmap.h>
#include <rstemp.h>
int reg_type;
};
+typedef struct {
+ int type;
+ int major;
+ int minor;
+ Z_AttributesPlusTerm *zapt;
+} AttrType;
+
+
static const char **rpn_char_map_handler (void *vp, const char **from, int len)
{
struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
}
-typedef struct {
- int type;
- int major;
- int minor;
- Z_AttributesPlusTerm *zapt;
-} AttrType;
-
static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
const char **string_value)
{
int num_attributes;
-#ifdef ASN_COMPILED
num_attributes = src->zapt->attributes->num_attributes;
-#else
- num_attributes = src->zapt->num_attributes;
-#endif
while (src->major < num_attributes)
{
Z_AttributeElement *element;
-#ifdef ASN_COMPILED
element = src->zapt->attributes->attributes[src->major];
-#else
- element = src->zapt->attributeList[src->major];
-#endif
if (src->type == *element->attributeType)
{
switch (element->which)
static void term_untrans (ZebraHandle zh, int reg_type,
char *dst, const char *src)
{
+ int len = 0;
while (*src)
{
const char *cp = zebra_maps_output (zh->reg->zebra_maps,
reg_type, &src);
- if (!cp)
- *dst++ = *src++;
+ if (!cp && len < IT_MAX_WORD-1)
+ dst[len++] = *src++;
else
- while (*cp)
- *dst++ = *cp++;
+ while (*cp && len < IT_MAX_WORD-1)
+ dst[len++] = *cp++;
}
- *dst = '\0';
+ dst[len] = '\0';
}
static void add_isam_p (const char *name, const char *info,
{
const char *db;
int set, use;
- char term_tmp[512];
+ char term_tmp[IT_MAX_WORD];
int su_code = 0;
int len = key_SU_decode (&su_code, name);
return *s0;
}
-#define REGEX_CHARS " []()|.*+!"
+#define REGEX_CHARS " []()|.*+?!"
/* term_100: handle term, where trunc=none (no operators at all) */
static int term_100 (ZebraMaps zebra_maps, int reg_type,
int i = 0;
int j = 0;
- if (!term_pre (zebra_maps, reg_type, src, "#!", "#!"))
+ if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#"))
return 0;
s0 = *src;
while (*s0)
{
- if (*s0 == '#')
+ if (*s0 == '?')
+ {
+ dst_term[j++] = *s0++;
+ if (*s0 >= '0' && *s0 <= '9')
+ {
+ int limit = 0;
+ while (*s0 >= '0' && *s0 <= '9')
+ {
+ limit = limit * 10 + (*s0 - '0');
+ dst_term[j++] = *s0++;
+ }
+ if (limit > 20)
+ limit = 20;
+ while (--limit >= 0)
+ {
+ dst[i++] = '.';
+ dst[i++] = '?';
+ }
+ }
+ else
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ }
+ }
+ else if (*s0 == '*')
{
dst[i++] = '.';
dst[i++] = '*';
dst_term[j++] = *s0++;
}
- else if (*s0 == '!')
+ else if (*s0 == '#')
{
dst[i++] = '.';
dst_term[j++] = *s0++;
int relation_value;
int i;
char *term_tmp = term_dict + strlen(term_dict);
- char term_component[256];
+ char term_component[2*IT_MAX_WORD+20];
attr_init (&relation, zapt, 2);
relation_value = attr_find (&relation, NULL);
*term_tmp++ = ']';
*term_tmp++ = '.';
*term_tmp++ = '*';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
}
*term_tmp++ = ')';
*term_tmp = '\0';
*term_tmp++ = '*';
*term_tmp++ = '|';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
}
for (i = 0; term_component[i]; )
string_rel_add_char (&term_tmp, term_component, &i);
*term_tmp++ = '*';
*term_tmp++ = '|';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
}
for (i = 0; term_component[i];)
string_rel_add_char (&term_tmp, term_component, &i);
*term_tmp++ = ']';
*term_tmp++ = '.';
*term_tmp++ = '*';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
}
*term_tmp++ = ')';
*term_tmp = '\0';
{
/* set was found, but value wasn't defined */
char val_str[32];
- sprintf (val_str, "%d (1)", use_value);
+ sprintf (val_str, "%d", use_value);
zh->errCode = 114;
zh->errString = nmem_strdup (stream, val_str);
}
if (!prefix_len)
{
char val_str[32];
- sprintf (val_str, "%d (2)", use_value);
+ sprintf (val_str, "%d", use_value);
zh->errCode = 114;
zh->errString = nmem_strdup (stream, val_str);
return -1;
}
-static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- char *termz)
+/* convert APT search term to UTF8 */
+static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz)
{
size_t sizez;
Z_Term *term = zapt->term;
size_t outleft = IT_MAX_WORD-1;
size_t ret;
- yaz_log (LOG_DEBUG, "converting general from ISO-8859-1");
ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
&outbuf, &outleft);
if (ret == (size_t)(-1))
return -1;
}
*outbuf = 0;
- return 0;
}
- sizez = term->u.general->len;
- if (sizez > IT_MAX_WORD-1)
- sizez = IT_MAX_WORD-1;
- memcpy (termz, term->u.general->buf, sizez);
- termz[sizez] = '\0';
+ else
+ {
+ sizez = term->u.general->len;
+ if (sizez > IT_MAX_WORD-1)
+ sizez = IT_MAX_WORD-1;
+ memcpy (termz, term->u.general->buf, sizez);
+ termz[sizez] = '\0';
+ }
break;
case Z_Term_characterString:
sizez = strlen(term->u.characterString);
break;
default:
zh->errCode = 124;
+ return -1;
}
return 0;
}
-static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
- char *termz, int reg_type)
+/* convert APT SCAN term to internal cmap */
+static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz, int reg_type)
{
- Z_Term *term = zapt->term;
- const char **map;
- const char *cp = (const char *) term->u.general->buf;
- const char *cp_end = cp + term->u.general->len;
- const char *src;
- int i = 0;
- const char *space_map = NULL;
- int len;
-
- while ((len = (cp_end - cp)) > 0)
+ char termz0[IT_MAX_WORD];
+
+ if (zapt_term_to_utf8(zh, zapt, termz0))
+ return -1; /* error */
+ else
{
- map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
- if (**map == *CHR_SPACE)
- space_map = *map;
- else
+ const char **map;
+ const char *cp = (const char *) termz0;
+ const char *cp_end = cp + strlen(cp);
+ const char *src;
+ int i = 0;
+ const char *space_map = NULL;
+ int len;
+
+ while ((len = (cp_end - cp)) > 0)
{
- if (i && space_map)
- for (src = space_map; *src; src++)
+ map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
+ if (**map == *CHR_SPACE)
+ space_map = *map;
+ else
+ {
+ if (i && space_map)
+ for (src = space_map; *src; src++)
+ termz[i++] = *src;
+ space_map = NULL;
+ for (src = *map; *src; src++)
termz[i++] = *src;
- space_map = NULL;
- for (src = *map; *src; src++)
- termz[i++] = *src;
+ }
}
+ termz[i] = '\0';
}
- termz[i] = '\0';
+ return 0;
}
static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no,
parms.temp_path = res_get (zh->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
rsfd_result = rset_open (result, RSETF_WRITE);
-
+
while (*more)
{
for (i = 1; i<rset_no; i++)
rset_temp_parms parms;
RSFD rsfd_result;
- logf (LOG_LOG, "generic prox, dist = %d, relation = %d, ordered =%d, exclusion=%d",
- distance, relation, ordered, exclusion);
+ yaz_log (LOG_LOG, "generic prox, dist=%d, relation=%d, ordered=%d"
+ ", exclusion=%d",
+ distance, relation, ordered, exclusion);
parms.rset_term = rset_term_create (prox_term, length_prox_term,
flags, term_type);
parms.rset_term->nn = min_nn;
oid_value attributeSet, struct grep_info *grep_info,
int reg_type, int complete_flag,
int num_bases, char **basenames,
- char *term_dst)
+ char *term_dst, int xpath_use, NMEM stream)
{
char term_dict[2*IT_MAX_WORD+2];
int r, base_no;
AttrType use;
int use_value;
+ const char *use_string = 0;
oid_value curAttributeSet = attributeSet;
const char *termp;
struct rpn_char_map_info rcmi;
rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
attr_init (&use, zapt, 1);
- use_value = attr_find (&use, &curAttributeSet);
- logf (LOG_DEBUG, "numeric_term, use value %d", use_value);
+ use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
if (use_value == -1)
use_value = 1016;
for (base_no = 0; base_no < num_bases; base_no++)
{
attent attp;
+ data1_local_attribute id_xpath_attr;
data1_local_attribute *local_attr;
int max_pos, prefix_len = 0;
termp = *term_sub;
- if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
+ if (use_value == -2) /* string attribute (assume IDXPATH/any) */
{
- logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
- curAttributeSet, use_value, r);
- if (r == -1)
- zh->errCode = 114;
- else
- zh->errCode = 121;
- return -1;
+ use_value = xpath_use;
+ attp.local_attributes = &id_xpath_attr;
+ attp.attset_ordinal = VAL_IDXPATH;
+ id_xpath_attr.next = 0;
+ id_xpath_attr.local = use_value;
+ }
+ else if (curAttributeSet == VAL_IDXPATH)
+ {
+ attp.local_attributes = &id_xpath_attr;
+ attp.attset_ordinal = VAL_IDXPATH;
+ id_xpath_attr.next = 0;
+ id_xpath_attr.local = use_value;
+ }
+ else
+ {
+ if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
+ {
+ logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
+ curAttributeSet, use_value, r);
+ if (r == -1)
+ {
+ char val_str[32];
+ sprintf (val_str, "%d", use_value);
+ zh->errString = nmem_strdup (stream, val_str);
+ zh->errCode = 114;
+ }
+ else
+ zh->errCode = 121;
+ return -1;
+ }
}
if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
{
}
if (!prefix_len)
{
+ char val_str[32];
+ sprintf (val_str, "%d", use_value);
zh->errCode = 114;
+ zh->errString = nmem_strdup (stream, val_str);
return -1;
}
term_dict[prefix_len++] = ')';
oid_value attributeSet,
NMEM stream,
int reg_type, int complete_flag,
- const char *rank_type,
+ const char *rank_type, int xpath_use,
int num_bases, char **basenames)
{
char term_dst[IT_MAX_WORD+1];
grep_info.isam_p_indx = 0;
r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
reg_type, complete_flag, num_bases, basenames,
- term_dst);
+ term_dst, xpath_use,
+ stream);
if (r < 1)
break;
logf (LOG_DEBUG, "term: %s", term_dst);
nmem_malloc (stream, sizeof(*sks->caseSensitivity));
*sks->caseSensitivity = 0;
-#ifdef ASN_COMPILED
sks->which = Z_SortKeySpec_null;
sks->u.null = odr_nullval ();
-#else
- sks->missingValueAction = 0;
-#endif
-
sort_sequence->specs[i] = sks;
parms.rset_term = rset_term_create (termz, -1, rank_type,
return rset_create (rset_kind_null, &parms);
}
+/* pop - moved to xpath.c */
+#if 0
+
struct xpath_predicate {
int which;
union {
struct xpath_predicate *predicate;
};
+#endif
+
static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
oid_value attributeSet,
- struct xpath_location_step *xpath, NMEM mem)
+ struct xpath_location_step *xpath, int max, NMEM mem)
{
oid_value curAttributeSet = attributeSet;
AttrType use;
const char *use_string = 0;
- const char *cp;
- int no = 0;
attr_init (&use, zapt, 1);
attr_find_ex (&use, &curAttributeSet, &use_string);
if (!use_string || *use_string != '/')
return -1;
- cp = use_string;
- while (*cp)
- {
- int i = 0;
- while (*cp && !strchr("/[",*cp))
- {
- i++;
- cp++;
- }
- xpath[no].predicate = 0;
- xpath[no].part = nmem_malloc (mem, i+1);
- memcpy (xpath[no].part, cp - i, i);
- xpath[no].part[i] = 0;
- if (*cp == '[')
- {
- struct xpath_predicate *p = xpath[no].predicate =
- nmem_malloc (mem, sizeof(struct xpath_predicate));
-
- p->which = XPATH_PREDICATE_RELATION;
- cp++;
- while (*cp == ' ')
- cp++;
-
- for (i = 0; *cp && !strchr("><=] ", *cp); i++)
- cp++;
- p->u.relation.name = nmem_malloc (mem, i+1);
- memcpy (p->u.relation.name, cp - i, i);
- p->u.relation.name[i] = 0;
- while (*cp == ' ')
- cp++;
- if (*cp != ']')
- {
- for (i = 0; *cp && strchr(">=<!", *cp); i++)
- cp++;
-
- p->u.relation.op = nmem_malloc (mem, i+1);
- if (i)
- memcpy (p->u.relation.op, cp - i, i);
- p->u.relation.op[i] = 0;
-
- while (*cp == ' ')
- cp++;
-
- if (strchr("\"'", *cp))
- {
- cp++;
- for (i = 0; *cp && !strchr("\"'", *cp); i++)
- cp++;
-
- p->u.relation.value = nmem_malloc (mem, i+1);
- if (i)
- memcpy (p->u.relation.value, cp - i, i);
- p->u.relation.value[i] = 0;
- yaz_log (LOG_LOG, "value=%s", p->u.relation.value);
-
- cp++;
- }
- else
- {
- for (i = 0; *cp && !strchr(" ]", *cp); i++)
- cp++;
- p->u.relation.value = nmem_malloc (mem, i+1);
- if (i)
- memcpy (p->u.relation.value, cp - i, i);
- p->u.relation.value[i] = 0;
- }
- while (*cp == ' ')
- cp++;
- }
- if (*cp == ']')
- cp++;
- } /* end of ] predicate */
- no++;
- if (*cp != '/')
- break;
- cp++;
- }
- return no;
+ return zebra_parse_xpath_str(use_string, xpath, max, mem);
}
-
+
+
static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
int reg_type, const char *term, int use,
logf (LOG_DEBUG, "search_type=%s", search_type);
logf (LOG_DEBUG, "rank_type=%s", rank_type);
- if (trans_term (zh, zapt, termz))
+ if (zapt_term_to_utf8(zh, zapt, termz))
return 0;
if (sort_flag)
return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
rank_type);
- xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, stream);
+ xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
if (xpath_len >= 0)
{
xpath_use = 1016;
{
rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
reg_id, complete_flag, rank_type,
+ xpath_use,
num_bases, basenames);
}
else if (!strcmp (search_type, "always"))
r = rset_create (rset_kind_not, &bool_parms);
break;
case Z_Operator_prox:
-#ifdef ASN_COMPILED
if (zop->u.prox->which != Z_ProximityOperator_known)
{
zh->errCode = 132;
return NULL;
}
-#else
- if (zop->u.prox->which != Z_ProxCode_known)
- {
- zh->errCode = 132;
- return NULL;
- }
-#endif
-
-#ifdef ASN_COMPILED
if (*zop->u.prox->u.known != Z_ProxUnit_word)
{
char *val = (char *) nmem_malloc (stream, 16);
sprintf (val, "%d", *zop->u.prox->u.known);
return NULL;
}
-#else
- if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
- {
- char *val = (char *) nmem_malloc (stream, 16);
- zh->errCode = 132;
- zh->errString = val;
- sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
- return NULL;
- }
-#endif
else
{
RSET rsets[2];
nmem_strdup (stream, zs->u.simple->u.resultSetId);
return 0;
}
+ else
+ rset_dup(r);
}
else
{
static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
char **dst, const char *src)
{
- char term_dst[1024];
+ char term_src[IT_MAX_WORD];
+ char term_dst[IT_MAX_WORD];
- term_untrans (zh, reg_type, term_dst, src);
-
- *dst = (char *) nmem_malloc (stream, strlen(term_dst)+1);
- strcpy (*dst, term_dst);
+ term_untrans (zh, reg_type, term_src, src);
+
+ if (zh->iconv_from_utf8 != 0)
+ {
+ int len;
+ char *inbuf = term_src;
+ size_t inleft = strlen(term_src);
+ char *outbuf = term_dst;
+ size_t outleft = sizeof(term_dst)-1;
+ size_t ret;
+
+ ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
+ &outbuf, &outleft);
+ if (ret == (size_t)(-1))
+ len = 0;
+ else
+ len = outbuf - term_dst;
+ *dst = nmem_malloc (stream, len + 1);
+ if (len > 0)
+ memcpy (*dst, term_dst, len);
+ (*dst)[len] = '\0';
+ }
+ else
+ *dst = nmem_strdup (stream, term_src);
}
static void count_set (RSET r, int *count)
oid_value attributeset,
int num_bases, char **basenames,
int *position, int *num_entries, ZebraScanEntry **list,
- int *is_partial)
+ int *is_partial, RSET limit_set, int return_zero)
{
int i;
int pos = *position;
char rank_type[128];
int complete_flag;
int sort_flag;
+
*list = 0;
if (attributeset == VAL_NONE)
attributeset = VAL_BIB1;
+ if (!limit_set)
+ {
+ AttrType termset;
+ int termset_value_numeric;
+ const char *termset_value_string;
+ attr_init (&termset, zapt, 8);
+ termset_value_numeric =
+ attr_find_ex (&termset, NULL, &termset_value_string);
+ if (termset_value_numeric != -1)
+ {
+ char resname[32];
+ const char *termset_name = 0;
+
+ if (termset_value_numeric != -2)
+ {
+
+ sprintf (resname, "%d", termset_value_numeric);
+ termset_name = resname;
+ }
+ else
+ termset_name = termset_value_string;
+
+ limit_set = resultSetRef (zh, termset_name);
+ }
+ }
+
yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
pos, num, attributeset);
logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
attributeset, use_value);
if (r == -1)
- zh->errCode = 114;
+ {
+ char val_str[32];
+ sprintf (val_str, "%d", use_value);
+ zh->errCode = 114;
+ zh->errString = odr_strdup (stream, val_str);
+ }
else
zh->errCode = 121;
*num_entries = 0;
termz[prefix_len] = 0;
strcpy (scan_info->prefix, termz);
- trans_scan_term (zh, zapt, termz+prefix_len, reg_id);
+ if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
+ return ;
dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
scan_info, scan_handle);
ptr[j]++;
}
}
+ if (limit_set)
+ {
+ rset_bool_parms bool_parms;
+
+ bool_parms.key_size = sizeof(struct it_key);
+ bool_parms.cmp = key_compare_it;
+ bool_parms.rset_l = rset;
+ bool_parms.rset_r = rset_dup(limit_set);
+
+ rset = rset_create (rset_kind_and, &bool_parms);
+ }
count_set (rset, &glist[i+before].occurrences);
rset_delete (rset);
}
ptr[j]++;
}
}
+ if (limit_set)
+ {
+ rset_bool_parms bool_parms;
+
+ bool_parms.key_size = sizeof(struct it_key);
+ bool_parms.cmp = key_compare_it;
+ bool_parms.rset_l = rset;
+ bool_parms.rset_r = rset_dup(limit_set);
+
+ rset = rset_create (rset_kind_and, &bool_parms);
+ }
count_set (rset, &glist[before-1-i].occurrences);
rset_delete (rset);
}