/*
- * Copyright (C) 1994-1996, Index Data I/S
+ * Copyright (C) 1994-1997, Index Data I/S
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: zrpn.c,v $
- * Revision 1.58 1996-12-23 15:30:45 adam
+ * Revision 1.66 1997-09-25 14:58:03 adam
+ * Windows NT port.
+ *
+ * Revision 1.65 1997/09/22 12:39:06 adam
+ * Added get_pos method for the ranked result sets.
+ *
+ * Revision 1.64 1997/09/18 08:59:20 adam
+ * Extra generic handle for the character mapping routines.
+ *
+ * Revision 1.63 1997/09/17 12:19:18 adam
+ * Zebra version corresponds to YAZ version 1.4.
+ * Changed Zebra server so that it doesn't depend on global common_resource.
+ *
+ * Revision 1.62 1997/09/05 15:30:09 adam
+ * Changed prototype for chr_map_input - added const.
+ * Added support for C++, headers uses extern "C" for public definitions.
+ *
+ * Revision 1.61 1997/02/10 10:21:14 adam
+ * Bug fix: in search terms character (^) wasn't observed.
+ *
+ * Revision 1.60 1997/01/31 11:10:34 adam
+ * Bug fix: Leading and trailing white space weren't removed in scan tokens.
+ *
+ * Revision 1.59 1997/01/17 11:31:46 adam
+ * Bug fix: complete phrase search didn't work.
+ *
+ * Revision 1.58 1996/12/23 15:30:45 adam
* Work on truncation.
* Bug fix: result sets weren't deleted after server shut down.
*
*/
#include <stdio.h>
#include <assert.h>
+#ifdef WINDOWS
+#include <io.h>
+#else
#include <unistd.h>
+#endif
#include <ctype.h>
#include "zserver.h"
-#include "attribute.h"
#include <charmap.h>
#include <rstemp.h>
return 0;
}
-static int term_pre (char **src, const char *ct1, const char *ct2)
+static int term_pre (const char **src, const char *ct1, const char *ct2)
{
- char *s1, *s0 = *src;
- char **map;
+ const char *s1, *s0 = *src;
+ const char **map;
/* skip white space */
while (*s0)
if (ct2 && strchr (ct2, *s0))
break;
s1 = s0;
- map = map_chrs_input (&s1, strlen(s1));
+ map = map_chrs_input (0, &s1, strlen(s1));
if (**map != *CHR_SPACE)
break;
s0 = s1;
return *s0;
}
-static int term_100 (char **src, char *dst)
+static int term_100 (const char **src, char *dst, int space_split)
{
- char *s0, *s1, **map;
+ const char *s0, *s1;
+ const char **map;
int i = 0;
if (!term_pre (src, NULL, NULL))
while (*s0)
{
s1 = s0;
- map = map_chrs_input (&s0, strlen(s0));
- if (**map == *CHR_SPACE)
+ map = map_chrs_input (0, &s0, strlen(s0));
+ if (space_split && **map == *CHR_SPACE)
break;
while (s1 < s0)
{
return i;
}
-static int term_101 (char **src, char *dst)
+static int term_101 (const char **src, char *dst, int space_split)
{
- char *s0, *s1, **map;
+ const char *s0, *s1;
+ const char **map;
int i = 0;
if (!term_pre (src, "#", "#"))
else
{
s1 = s0;
- map = map_chrs_input (&s0, strlen(s0));
- if (**map == *CHR_SPACE)
+ map = map_chrs_input (0, &s0, strlen(s0));
+ if (space_split && **map == *CHR_SPACE)
break;
while (s1 < s0)
{
}
-static int term_103 (char **src, char *dst, int *errors)
+static int term_103 (const char **src, char *dst, int *errors, int space_split)
{
int i = 0;
- char *s0, *s1, **map;
+ const char *s0, *s1;
+ const char **map;
- if (!term_pre (src, "\\()[].*+?|", "("))
+ if (!term_pre (src, "^\\()[].*+?|", "("))
return 0;
s0 = *src;
if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
}
while (*s0)
{
- if (strchr ("\\()[].*+?|-", *s0))
+ if (strchr ("^\\()[].*+?|-", *s0))
dst[i++] = *s0++;
else
{
s1 = s0;
- map = map_chrs_input (&s0, strlen(s0));
+ map = map_chrs_input (0, &s0, strlen(s0));
if (**map == *CHR_SPACE)
break;
while (s1 < s0)
return i;
}
-static int term_102 (char **src, char *dst)
+static int term_102 (const char **src, char *dst, int space_split)
{
- return term_103 (src, dst, NULL);
+ return term_103 (src, dst, NULL, space_split);
}
/* gen_regular_rel - generate regular expression from relation
}
static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
- char **term_sub,
+ const char **term_sub,
char *term_dict,
oid_value attributeSet,
struct grep_info *grep_info,
switch (relation_value)
{
case 1:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value <= 0)
gen_regular_rel (term_dict + strlen(term_dict), term_value-1, 1);
break;
case 2:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value < 0)
gen_regular_rel (term_dict + strlen(term_dict), term_value, 1);
break;
case 4:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value < 0)
gen_regular_rel (term_dict + strlen(term_dict), term_value, 0);
break;
case 5:
- if (!term_100 (term_sub, term_dict))
+ if (!term_100 (term_sub, term_dict, 1))
return 0;
term_value = atoi (term_dict);
if (term_value < 0)
}
static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt,
- char **term_sub, int regType,
+ const char **term_sub, int regType,
oid_value attributeSet, struct grep_info *grep_info,
- int num_bases, char **basenames)
+ int num_bases, char **basenames, int space_split)
{
char term_dict[2*IT_MAX_WORD+2];
int j, r, base_no;
AttrType use;
int use_value;
oid_value curAttributeSet = attributeSet;
- char *termp;
+ const char *termp;
attr_init (&use, zapt, 1);
use_value = attr_find (&use, &curAttributeSet);
- logf (LOG_DEBUG, "use value %d", use_value);
+ logf (LOG_DEBUG, "field_term, use value %d", use_value);
attr_init (&truncation, zapt, 5);
truncation_value = attr_find (&truncation, NULL);
logf (LOG_DEBUG, "truncation value %d", truncation_value);
for (base_no = 0; base_no < num_bases; base_no++)
{
- attent *attp;
+ attent attp;
data1_local_attribute *local_attr;
int max_pos, prefix_len = 0;
termp = *term_sub;
- attp = att_getentbyatt (curAttributeSet, use_value);
- if (!attp)
+ if (!att_getentbyatt (zi, &attp, curAttributeSet, use_value))
{
logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
curAttributeSet, use_value);
zi->errString = basenames[base_no];
return -1;
}
- for (local_attr = attp->local_attributes; local_attr;
+ for (local_attr = attp.local_attributes; local_attr;
local_attr = local_attr->next)
{
int ord;
- ord = zebTargetInfo_lookupSU (zi->zti, attp->attset_ordinal,
+ ord = zebTargetInfo_lookupSU (zi->zti, attp.attset_ordinal,
local_attr->local);
if (ord < 0)
continue;
{
case -1: /* not specified */
case 100: /* do not truncate */
- term_dict[j++] = '(';
- if (!term_100 (&termp, term_dict + j))
+ term_dict[j++] = '(';
+ if (!term_100 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ")");
r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
break;
case 1: /* right truncation */
term_dict[j++] = '(';
- if (!term_100 (&termp, term_dict + j))
+ if (!term_100 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ".*)");
dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
return -1;
case 101: /* process # in term */
term_dict[j++] = '(';
- if (!term_101 (&termp, term_dict + j))
+ if (!term_101 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ")");
r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info,
break;
case 102: /* Regexp-1 */
term_dict[j++] = '(';
- if (!term_102 (&termp, term_dict + j))
+ if (!term_102 (&termp, term_dict + j, space_split))
return 0;
strcat (term_dict, ")");
logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
case 103: /* Regexp-1 */
r = 1;
term_dict[j++] = '(';
- if (!term_103 (&termp, term_dict + j, &r))
+ if (!term_103 (&termp, term_dict + j, &r, space_split))
return 0;
strcat (term_dict, ")");
logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
char *termz)
{
Z_Term *term = zapt->term;
- char **map;
- char *cp = (char*) term->u.general->buf;
+ const char **map;
+ const char *cp = (const char *) term->u.general->buf;
const char *cp_end = cp + term->u.general->len;
const char *src;
int i = 0;
- int prev_space = 0;
+ const char *space_map = NULL;
int len;
while ((len = (cp_end - cp)) > 0)
{
- map = map_chrs_input (&cp, len);
+ map = map_chrs_input (0, &cp, len);
if (**map == *CHR_SPACE)
- {
- if (prev_space)
- continue;
- prev_space = 1;
- }
+ space_map = *map;
else
- prev_space = 0;
- for (src = *map; *src; src++)
- termz[i++] = *src;
+ {
+ if (i && space_map)
+ for (src = space_map; *src; src++)
+ termz[i++] = *src;
+ space_map = NULL;
+ for (src = *map; *src; src++)
+ termz[i++] = *src;
+ }
}
termz[i] = '\0';
}
{
rset_relevance_parms parms;
char termz[IT_MAX_WORD+1];
- char *termp = termz;
+ const char *termp = termz;
struct grep_info grep_info;
RSET result;
int term_index = 0;
int r;
parms.key_size = sizeof(struct it_key);
- parms.max_rec = 100;
+ parms.max_rec = 1000;
parms.cmp = key_compare_it;
+ parms.get_pos = key_get_pos;
parms.is = zi->isam;
parms.isc = zi->isamc;
parms.no_terms = 0;
while (1)
{
r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info,
- num_bases, basenames);
+ num_bases, basenames, 1);
if (r <= 0)
break;
#ifdef TERM_COUNT
char termz[IT_MAX_WORD+1];
struct grep_info grep_info;
RSET result;
- char *termp = termz;
+ const char *termp = termz;
int r;
if (zapt->term->which != Z_Term_general)
grep_info.isam_p_buf = NULL;
r = field_term (zi, zapt, &termp, 'p', attributeSet, &grep_info,
- num_bases, basenames);
+ num_bases, basenames, 0);
result = rset_trunc (zi, grep_info.isam_p_buf, grep_info.isam_p_indx);
#ifdef TERM_COUNT
xfree(grep_info.term_no);
return result;
}
-static RSET rpn_proximity (RSET rset1, RSET rset2, int ordered,
+static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2,
+ int ordered,
int exclusion, int relation, int distance)
{
int i;
more2 = rset_read (rset2, rsfd2, &buf2);
parms.key_size = sizeof (struct it_key);
+ parms.temp_path = res_get (zi->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
int excl = exclusion;
if (!ordered && diff < 0)
diff = -diff;
- logf (LOG_DEBUG, "l = %d r = %d", seqno[i], buf2.seqno);
switch (relation)
{
case 1: /* < */
break;
}
if (excl)
- {
- logf (LOG_DEBUG, " match");
rset_write (result, rsfd_result, &buf2);
- }
}
} while ((more2 = rset_read (rset2, rsfd2, &buf2)) &&
sysno == buf2.sysno);
return result;
}
-static RSET rpn_prox (RSET *rset, int rset_no)
+static RSET rpn_prox (ZServerInfo *zi, RSET *rset, int rset_no)
{
int i;
RSFD *rsfd;
}
}
parms.key_size = sizeof (struct it_key);
+ parms.temp_path = res_get (zi->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
int num_bases, char **basenames)
{
char termz[IT_MAX_WORD+1];
- char *termp = termz;
+ const char *termp = termz;
RSET rset[60], result;
int i, r, rset_no = 0;
struct grep_info grep_info;
{
grep_info.isam_p_indx = 0;
r = field_term (zi, zapt, &termp, 'w', attributeSet, &grep_info,
- num_bases, basenames);
+ num_bases, basenames, 1);
if (r < 1)
break;
rset[rset_no] = rset_trunc (zi, grep_info.isam_p_buf,
return rset_create (rset_kind_null, NULL);
else if (rset_no == 1)
return (rset[0]);
- result = rpn_prox (rset, rset_no);
+ result = rpn_prox (zi, rset, rset_no);
for (i = 0; i<rset_no; i++)
rset_delete (rset[i]);
return result;
return NULL;
}
parms.key_size = sizeof (struct it_key);
+ parms.temp_path = res_get (zi->res, "setTmpDir");
result = rset_create (rset_kind_temp, &parms);
rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO);
sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
return NULL;
}
- r = rpn_proximity (bool_parms.rset_l, bool_parms.rset_r,
+ r = rpn_proximity (zi, bool_parms.rset_l, bool_parms.rset_r,
*zop->u.prox->ordered,
(!zop->u.prox->exclusion ? 0 :
*zop->u.prox->exclusion),
return r;
}
-void count_set_save (RSET *r, int *count)
+void count_set_save (ZServerInfo *zi, RSET *r, int *count)
{
int psysno = 0;
int kno = 0;
RSFD rfd, wfd;
RSET w;
rset_temp_parms parms;
-
+ int maxResultSetSize = atoi (res_get_def (zi->res,
+ "maxResultSetSize", "400"));
logf (LOG_DEBUG, "count_set_save");
*count = 0;
parms.key_size = sizeof(struct it_key);
+ parms.temp_path = res_get (zi->res, "setTmpDir");
w = rset_create (rset_kind_temp, &parms);
wfd = rset_open (w, RSETF_WRITE|RSETF_SORT_SYSNO);
rfd = rset_open (*r, RSETF_READ|RSETF_SORT_SYSNO);
{
if (key.sysno != psysno)
{
- if (*count < 400)
+ if (*count < maxResultSetSize)
rset_write (w, wfd, &key);
(*count)++;
psysno = key.sysno;
oident *attrset;
oid_value attributeSet;
- dict_grep_cmap (zi->dict, map_chrs_input);
+ dict_grep_cmap (zi->dict, 0, map_chrs_input);
zlog_rpn (rpn);
zi->errCode = 0;
if (!rset)
return zi->errCode;
if (rset_is_volatile(rset))
- count_set_save(&rset,hits);
+ count_set_save(zi, &rset,hits);
else
count_set (rset, hits);
resultSetAdd (zi, setname, 1, rset);
idx = scan_info->after - pos + scan_info->before;
else
idx = - pos - 1;
- logf (LOG_DEBUG, "%-3d %s", idx, name+len_prefix);
scan_info->list[idx].term = odr_malloc (scan_info->odr,
strlen(name + len_prefix)+1);
strcpy (scan_info->list[idx].term, name + len_prefix);
use_value = 1016;
for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
{
- attent *attp;
+ attent attp;
data1_local_attribute *local_attr;
- attp = att_getentbyatt (attributeset, use_value);
- if (!attp)
+ if (!att_getentbyatt (zi, &attp, attributeset, use_value))
{
logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
attributeset, use_value);
zi->errString = basenames[base_no];
return zi->errCode = 109; /* Database unavailable */
}
- for (local_attr = attp->local_attributes; local_attr && ord_no < 32;
+ for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
local_attr = local_attr->next)
{
int ord;
- ord = zebTargetInfo_lookupSU (zi->zti, attp->attset_ordinal,
+ ord = zebTargetInfo_lookupSU (zi->zti, attp.attset_ordinal,
local_attr->local);
if (ord > 0)
ords[ord_no++] = ord;