+#endif
+#include <ctype.h>
+
+#include <yaz/diagbib1.h>
+#include "index.h"
+#include <zebra_xpath.h>
+
+#include <charmap.h>
+#include <rset.h>
+
+static const struct key_control it_ctrl =
+{
+ sizeof(struct it_key),
+ 2, /* we have sysnos and seqnos in this key, nothing more */
+ key_compare_it,
+ key_logdump_txt, /* FIXME - clean up these functions */
+ key_get_seq,
+};
+
+
+const struct key_control *key_it_ctrl = &it_ctrl;
+
+struct rpn_char_map_info
+{
+ ZebraMaps zm;
+ int reg_type;
+};
+
+typedef struct
+{
+ int type;
+ int major;
+ int minor;
+ Z_AttributesPlusTerm *zapt;
+} AttrType;
+
+
+static int log_level_set = 0;
+static int log_level_rpn = 0;
+
+static const char **rpn_char_map_handler(void *vp, const char **from, int len)
+{
+ struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
+ const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
+#if 0
+ if (out && *out)
+ {
+ const char *outp = *out;
+ yaz_log(YLOG_LOG, "---");
+ while (*outp)
+ {
+ yaz_log(YLOG_LOG, "%02X", *outp);
+ outp++;
+ }
+ }
+#endif
+ return out;
+}
+
+static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
+ struct rpn_char_map_info *map_info)
+{
+ map_info->zm = reg->zebra_maps;
+ map_info->reg_type = reg_type;
+ dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
+}
+
+static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
+ const char **string_value)
+{
+ int num_attributes;
+
+ num_attributes = src->zapt->attributes->num_attributes;
+ while (src->major < num_attributes)
+ {
+ Z_AttributeElement *element;
+
+ element = src->zapt->attributes->attributes[src->major];
+ if (src->type == *element->attributeType)
+ {
+ switch (element->which)
+ {
+ case Z_AttributeValue_numeric:
+ ++(src->major);
+ if (element->attributeSet && attributeSetP)
+ {
+ oident *attrset;
+
+ attrset = oid_getentbyoid(element->attributeSet);
+ *attributeSetP = attrset->value;
+ }
+ return *element->value.numeric;
+ break;
+ case Z_AttributeValue_complex:
+ if (src->minor >= element->value.complex->num_list)
+ break;
+ if (element->attributeSet && attributeSetP)
+ {
+ oident *attrset;
+
+ attrset = oid_getentbyoid(element->attributeSet);
+ *attributeSetP = attrset->value;
+ }
+ if (element->value.complex->list[src->minor]->which ==
+ Z_StringOrNumeric_numeric)
+ {
+ ++(src->minor);
+ return
+ *element->value.complex->list[src->minor-1]->u.numeric;
+ }
+ else if (element->value.complex->list[src->minor]->which ==
+ Z_StringOrNumeric_string)
+ {
+ if (!string_value)
+ break;
+ ++(src->minor);
+ *string_value =
+ element->value.complex->list[src->minor-1]->u.string;
+ return -2;
+ }
+ else
+ break;
+ default:
+ assert(0);
+ }
+ }
+ ++(src->major);
+ }
+ return -1;
+}
+
+static int attr_find(AttrType *src, oid_value *attributeSetP)
+{
+ return attr_find_ex(src, attributeSetP, 0);
+}
+
+static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
+ int type)
+{
+ src->zapt = zapt;
+ src->type = type;
+ src->major = 0;
+ src->minor = 0;
+}
+
+#define TERM_COUNT
+
+struct grep_info {
+#ifdef TERM_COUNT
+ int *term_no;
+#endif
+ ISAM_P *isam_p_buf;
+ int isam_p_size;
+ int isam_p_indx;
+ ZebraHandle zh;
+ int reg_type;
+ ZebraSet termset;
+};
+
+static void term_untrans(ZebraHandle zh, int reg_type,
+ char *dst, const char *src)
+{
+ int len = 0;
+ while (*src)
+ {
+ const char *cp = zebra_maps_output(zh->reg->zebra_maps,
+ reg_type, &src);
+ if (!cp && len < IT_MAX_WORD-1)
+ dst[len++] = *src++;
+ else
+ while (*cp && len < IT_MAX_WORD-1)
+ dst[len++] = *cp++;
+ }
+ dst[len] = '\0';
+}
+
+static void add_isam_p(const char *name, const char *info,
+ struct grep_info *p)
+{
+ if (!log_level_set)
+ {
+ log_level_rpn = yaz_log_module_level("rpn");
+ log_level_set = 1;
+ }
+ if (p->isam_p_indx == p->isam_p_size)
+ {
+ ISAM_P *new_isam_p_buf;
+#ifdef TERM_COUNT
+ int *new_term_no;
+#endif
+ p->isam_p_size = 2*p->isam_p_size + 100;
+ new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
+ p->isam_p_size);
+ if (p->isam_p_buf)
+ {
+ memcpy(new_isam_p_buf, p->isam_p_buf,
+ p->isam_p_indx * sizeof(*p->isam_p_buf));
+ xfree(p->isam_p_buf);
+ }
+ p->isam_p_buf = new_isam_p_buf;
+
+#ifdef TERM_COUNT
+ new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
+ if (p->term_no)
+ {
+ memcpy(new_term_no, p->isam_p_buf,
+ p->isam_p_indx * sizeof(*p->term_no));
+ xfree(p->term_no);
+ }
+ p->term_no = new_term_no;
+#endif
+ }
+ assert(*info == sizeof(*p->isam_p_buf));
+ memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
+
+#if 1
+ if (p->termset)
+ {
+ const char *db;
+ int set, use;
+ char term_tmp[IT_MAX_WORD];
+ int su_code = 0;
+ int len = key_SU_decode (&su_code, name);
+
+ term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
+ yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
+ zebraExplain_lookup_ord (p->zh->reg->zei,
+ su_code, &db, &set, &use);
+ yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
+
+ resultSetAddTerm(p->zh, p->termset, name[len], db,
+ set, use, term_tmp);
+ }
+#endif
+ (p->isam_p_indx)++;
+}
+
+static int grep_handle(char *name, const char *info, void *p)
+{
+ add_isam_p(name, info, (struct grep_info *) p);
+ return 0;
+}
+
+static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
+ const char *ct1, const char *ct2, int first)
+{
+ const char *s1, *s0 = *src;
+ const char **map;
+
+ /* skip white space */
+ while (*s0)
+ {
+ if (ct1 && strchr(ct1, *s0))
+ break;
+ if (ct2 && strchr(ct2, *s0))
+ break;
+ s1 = s0;
+ map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
+ if (**map != *CHR_SPACE)
+ break;
+ s0 = s1;
+ }
+ *src = s0;
+ return *s0;
+}
+
+
+static void esc_str(char *out_buf, int out_size,
+ const char *in_buf, int in_size)
+{
+ int k;
+
+ assert(out_buf);
+ assert(in_buf);
+ assert(out_size > 20);
+ *out_buf = '\0';
+ for (k = 0; k<in_size; k++)
+ {
+ int c = in_buf[k] & 0xff;
+ int pc;
+ if (c < 32 || c > 126)
+ pc = '?';
+ else
+ pc = c;
+ sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
+ if (strlen(out_buf) > out_size-20)
+ {
+ strcat(out_buf, "..");
+ break;
+ }
+ }
+}
+
+#define REGEX_CHARS " []()|.*+?!"
+
+/* term_100: handle term, where trunc = none(no operators at all) */
+static int term_100(ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term)
+{
+ const char *s0;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ const char *space_start = 0;
+ const char *space_end = 0;
+
+ if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ const char *s1 = s0;
+ int q_map_match = 0;
+ map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
+ &q_map_match);
+ if (space_split)
+ {
+ if (**map == *CHR_SPACE)
+ break;
+ }
+ else /* complete subfield only. */
+ {
+ if (**map == *CHR_SPACE)
+ { /* save space mapping for later .. */
+ space_start = s1;
+ space_end = s0;
+ continue;
+ }
+ else if (space_start)
+ { /* reload last space */
+ while (space_start < space_end)
+ {
+ if (strchr(REGEX_CHARS, *space_start))
+ dst[i++] = '\\';
+ dst_term[j++] = *space_start;
+ dst[i++] = *space_start++;
+ }
+ /* and reset */
+ space_start = space_end = 0;
+ }
+ }
+ /* add non-space char */
+ memcpy(dst_term+j, s1, s0 - s1);
+ j += (s0 - s1);
+ if (!q_map_match)
+ {
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst[i++] = *s1++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ strcpy(dst + i, map[0]);
+ i += strlen(map[0]);
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_101: handle term, where trunc = Process # */
+static int term_101(ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term)
+{
+ const char *s0;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ if (*s0 == '#')
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ dst_term[j++] = *s0++;
+ }
+ else
+ {
+ const char *s1 = s0;
+ int q_map_match = 0;
+ map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
+ &q_map_match);
+ if (space_split && **map == *CHR_SPACE)
+ break;
+
+ /* add non-space char */
+ memcpy(dst_term+j, s1, s0 - s1);
+ j += (s0 - s1);
+ if (!q_map_match)
+ {
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst[i++] = *s1++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ strcpy(dst + i, map[0]);
+ i += strlen(map[0]);
+ }
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j++] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_103: handle term, where trunc = re-2 (regular expressions) */
+static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
+ char *dst, int *errors, int space_split,
+ char *dst_term)
+{
+ int i = 0;
+ int j = 0;
+ const char *s0;
+ const char **map;
+
+ if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
+ return 0;
+ s0 = *src;
+ if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
+ isdigit(((const unsigned char *)s0)[1]))
+ {
+ *errors = s0[1] - '0';
+ s0 += 3;
+ if (*errors > 3)
+ *errors = 3;
+ }
+ while (*s0)
+ {
+ if (strchr("^\\()[].*+?|-", *s0))
+ {
+ dst_term[j++] = *s0;
+ dst[i++] = *s0++;
+ }
+ else
+ {
+ const char *s1 = s0;
+ int q_map_match = 0;
+ map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
+ &q_map_match);
+ if (space_split && **map == *CHR_SPACE)
+ break;
+
+ /* add non-space char */
+ memcpy(dst_term+j, s1, s0 - s1);
+ j += (s0 - s1);
+ if (!q_map_match)
+ {
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst[i++] = *s1++;
+ }
+ }
+ else
+ {
+ char tmpbuf[80];
+ esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+
+ strcpy(dst + i, map[0]);
+ i += strlen(map[0]);
+ }
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j] = '\0';
+ *src = s0;
+
+ return i;
+}
+
+/* term_103: handle term, where trunc = re-1 (regular expressions) */
+static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
+ char *dst, int space_split, char *dst_term)
+{
+ return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
+ dst_term);
+}
+
+
+/* term_104: handle term, where trunc = Process # and ! */
+static int term_104(ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term)
+{
+ const char *s0, *s1;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ if (*s0 == '?')
+ {
+ dst_term[j++] = *s0++;
+ if (*s0 >= '0' && *s0 <= '9')
+ {
+ int limit = 0;
+ while (*s0 >= '0' && *s0 <= '9')
+ {
+ limit = limit * 10 + (*s0 - '0');
+ dst_term[j++] = *s0++;
+ }
+ if (limit > 20)
+ limit = 20;
+ while (--limit >= 0)
+ {
+ dst[i++] = '.';
+ dst[i++] = '?';
+ }
+ }
+ else
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ }
+ }
+ else if (*s0 == '*')
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ dst_term[j++] = *s0++;
+ }
+ else if (*s0 == '#')
+ {
+ dst[i++] = '.';
+ dst_term[j++] = *s0++;
+ }
+ else
+ {
+ s1 = s0;
+ map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+ if (space_split && **map == *CHR_SPACE)
+ break;
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ }
+ dst[i] = '\0';
+ dst_term[j++] = '\0';
+ *src = s0;
+ return i;
+}
+
+/* term_105/106: handle term, where trunc = Process * and ! and right trunc */
+static int term_105(ZebraMaps zebra_maps, int reg_type,
+ const char **src, char *dst, int space_split,
+ char *dst_term, int right_truncate)
+{
+ const char *s0, *s1;
+ const char **map;
+ int i = 0;
+ int j = 0;
+
+ if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
+ return 0;
+ s0 = *src;
+ while (*s0)
+ {
+ if (*s0 == '*')
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ dst_term[j++] = *s0++;
+ }
+ else if (*s0 == '!')
+ {
+ dst[i++] = '.';
+ dst_term[j++] = *s0++;
+ }
+ else
+ {
+ s1 = s0;
+ map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+ if (space_split && **map == *CHR_SPACE)
+ break;
+ while (s1 < s0)
+ {
+ if (strchr(REGEX_CHARS, *s1))
+ dst[i++] = '\\';
+ dst_term[j++] = *s1;
+ dst[i++] = *s1++;
+ }
+ }
+ }
+ if (right_truncate)
+ {
+ dst[i++] = '.';
+ dst[i++] = '*';
+ }
+ dst[i] = '\0';
+
+ dst_term[j++] = '\0';
+ *src = s0;
+ return i;
+}
+
+
+/* gen_regular_rel - generate regular expression from relation
+ * val: border value (inclusive)
+ * islt: 1 if <=; 0 if >=.
+ */
+static void gen_regular_rel(char *dst, int val, int islt)
+{
+ int dst_p;
+ int w, d, i;
+ int pos = 0;
+ char numstr[20];
+
+ yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
+ if (val >= 0)
+ {
+ if (islt)
+ strcpy(dst, "(-[0-9]+|(");
+ else
+ strcpy(dst, "((");
+ }
+ else
+ {
+ if (!islt)
+ {
+ strcpy(dst, "([0-9]+|-(");
+ dst_p = strlen(dst);
+ islt = 1;
+ }
+ else
+ {
+ strcpy(dst, "(-(");
+ islt = 0;
+ }
+ val = -val;
+ }
+ dst_p = strlen(dst);
+ sprintf(numstr, "%d", val);
+ for (w = strlen(numstr); --w >= 0; pos++)
+ {
+ d = numstr[w];
+ if (pos > 0)
+ {
+ if (islt)
+ {
+ if (d == '0')
+ continue;
+ d--;
+ }
+ else
+ {
+ if (d == '9')
+ continue;
+ d++;
+ }
+ }
+
+ strcpy(dst + dst_p, numstr);
+ dst_p = strlen(dst) - pos - 1;
+
+ if (islt)
+ {
+ if (d != '0')
+ {
+ dst[dst_p++] = '[';
+ dst[dst_p++] = '0';
+ dst[dst_p++] = '-';
+ dst[dst_p++] = d;
+ dst[dst_p++] = ']';
+ }
+ else
+ dst[dst_p++] = d;
+ }
+ else
+ {
+ if (d != '9')
+ {
+ dst[dst_p++] = '[';
+ dst[dst_p++] = d;
+ dst[dst_p++] = '-';
+ dst[dst_p++] = '9';
+ dst[dst_p++] = ']';
+ }
+ else
+ dst[dst_p++] = d;
+ }
+ for (i = 0; i<pos; i++)
+ {
+ dst[dst_p++] = '[';
+ dst[dst_p++] = '0';
+ dst[dst_p++] = '-';
+ dst[dst_p++] = '9';
+ dst[dst_p++] = ']';
+ }
+ dst[dst_p++] = '|';
+ }
+ dst[dst_p] = '\0';
+ if (islt)
+ {
+ /* match everything less than 10^(pos-1) */
+ strcat(dst, "0*");
+ for (i = 1; i<pos; i++)
+ strcat(dst, "[0-9]?");
+ }
+ else
+ {
+ /* match everything greater than 10^pos */
+ for (i = 0; i <= pos; i++)
+ strcat(dst, "[0-9]");
+ strcat(dst, "[0-9]*");
+ }
+ strcat(dst, "))");
+}
+
+void string_rel_add_char(char **term_p, const char *src, int *indx)
+{
+ if (src[*indx] == '\\')
+ *(*term_p)++ = src[(*indx)++];
+ *(*term_p)++ = src[(*indx)++];
+}
+
+/*
+ * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
+ * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
+ * >= abc ([b-].*|a[c-].*|ab[c-].*)
+ * ([^-a].*|a[^-b].*|ab[c-].*)
+ * < abc ([-0].*|a[-a].*|ab[-b].*)
+ * ([^a-].*|a[^b-].*|ab[^c-].*)
+ * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
+ * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
+ */
+static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub, char *term_dict,
+ oid_value attributeSet,
+ int reg_type, int space_split, char *term_dst,
+ int *error_code)
+{
+ AttrType relation;
+ int relation_value;
+ int i;
+ char *term_tmp = term_dict + strlen(term_dict);
+ char term_component[2*IT_MAX_WORD+20];
+
+ attr_init(&relation, zapt, 2);
+ relation_value = attr_find(&relation, NULL);
+
+ *error_code = 0;
+ yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
+ switch (relation_value)
+ {
+ case 1:
+ if (!term_100(zh->reg->zebra_maps, reg_type,
+ term_sub, term_component,
+ space_split, term_dst))
+ return 0;
+ yaz_log(log_level_rpn, "Relation <");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i]; )
+ {
+ int j = 0;
+
+ if (i)
+ *term_tmp++ = '|';
+ while (j < i)
+ string_rel_add_char(&term_tmp, term_component, &j);
+
+ *term_tmp++ = '[';
+
+ *term_tmp++ = '^';
+ string_rel_add_char(&term_tmp, term_component, &i);
+ *term_tmp++ = '-';
+
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 2:
+ if (!term_100(zh->reg->zebra_maps, reg_type,
+ term_sub, term_component,
+ space_split, term_dst))
+ return 0;
+ yaz_log(log_level_rpn, "Relation <=");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i]; )
+ {
+ int j = 0;
+
+ while (j < i)
+ string_rel_add_char(&term_tmp, term_component, &j);
+ *term_tmp++ = '[';
+
+ *term_tmp++ = '^';
+ string_rel_add_char(&term_tmp, term_component, &i);
+ *term_tmp++ = '-';
+
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ *term_tmp++ = '|';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ for (i = 0; term_component[i]; )
+ string_rel_add_char(&term_tmp, term_component, &i);
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 5:
+ if (!term_100 (zh->reg->zebra_maps, reg_type,
+ term_sub, term_component, space_split, term_dst))
+ return 0;
+ yaz_log(log_level_rpn, "Relation >");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i];)
+ {
+ int j = 0;
+
+ while (j < i)
+ string_rel_add_char(&term_tmp, term_component, &j);
+ *term_tmp++ = '[';
+
+ *term_tmp++ = '^';
+ *term_tmp++ = '-';
+ string_rel_add_char(&term_tmp, term_component, &i);
+
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ *term_tmp++ = '|';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ for (i = 0; term_component[i];)
+ string_rel_add_char(&term_tmp, term_component, &i);
+ *term_tmp++ = '.';
+ *term_tmp++ = '+';
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 4:
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
+ term_component, space_split, term_dst))
+ return 0;
+ yaz_log(log_level_rpn, "Relation >=");
+
+ *term_tmp++ = '(';
+ for (i = 0; term_component[i];)
+ {
+ int j = 0;
+
+ if (i)
+ *term_tmp++ = '|';
+ while (j < i)
+ string_rel_add_char(&term_tmp, term_component, &j);
+ *term_tmp++ = '[';
+
+ if (term_component[i+1])
+ {
+ *term_tmp++ = '^';
+ *term_tmp++ = '-';
+ string_rel_add_char(&term_tmp, term_component, &i);
+ }
+ else
+ {
+ string_rel_add_char(&term_tmp, term_component, &i);
+ *term_tmp++ = '-';
+ }
+ *term_tmp++ = ']';
+ *term_tmp++ = '.';
+ *term_tmp++ = '*';
+
+ if ((term_tmp - term_dict) > IT_MAX_WORD)
+ break;
+ }
+ *term_tmp++ = ')';
+ *term_tmp = '\0';
+ break;
+ case 3:
+ case 102:
+ case -1:
+ yaz_log(log_level_rpn, "Relation =");
+ if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
+ term_component, space_split, term_dst))
+ return 0;
+ strcat(term_tmp, "(");
+ strcat(term_tmp, term_component);
+ strcat(term_tmp, ")");
+ break;
+ default:
+ *error_code = 117;
+ return 0;
+ }
+ return 1;
+}
+
+static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ oid_value attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ int reg_type, int complete_flag,
+ int num_bases, char **basenames,
+ char *term_dst, int xpath_use);
+
+static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ oid_value attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ int reg_type, int complete_flag,
+ int num_bases, char **basenames,
+ char *term_dst,
+ const char *rank_type, int xpath_use,
+ NMEM rset_nmem,
+ RSET *rset)
+{
+ ZEBRA_RES res;
+ *rset = 0;
+ grep_info->isam_p_indx = 0;
+ res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
+ reg_type, complete_flag, num_bases, basenames,
+ term_dst, xpath_use);
+ if (res != ZEBRA_OK)
+ return res;
+ if (!*term_sub) /* no more terms ? */
+ return res;
+ yaz_log(log_level_rpn, "term: %s", term_dst);
+ *rset = rset_trunc(zh, grep_info->isam_p_buf,
+ grep_info->isam_p_indx, term_dst,
+ strlen(term_dst), rank_type, 1 /* preserve pos */,
+ zapt->term->which, rset_nmem,
+ key_it_ctrl, key_it_ctrl->scope);
+ if (!*rset)
+ return ZEBRA_FAIL;
+ return ZEBRA_OK;
+}
+
+static char *nmem_strdup_i(NMEM nmem, int v)
+{
+ char val_str[64];
+ sprintf(val_str, "%d", v);
+ return nmem_strdup(nmem, val_str);
+}
+
+static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char **term_sub,
+ oid_value attributeSet, NMEM stream,
+ struct grep_info *grep_info,
+ int reg_type, int complete_flag,
+ int num_bases, char **basenames,
+ char *term_dst, int xpath_use)
+{
+ char term_dict[2*IT_MAX_WORD+4000];
+ int j, r, base_no;
+ AttrType truncation;
+ int truncation_value;
+ AttrType use;
+ int use_value;
+ const char *use_string = 0;
+ oid_value curAttributeSet = attributeSet;
+ const char *termp;
+ struct rpn_char_map_info rcmi;
+ int space_split = complete_flag ? 0 : 1;
+
+ int bases_ok = 0; /* no of databases with OK attribute */
+ int errCode = 0; /* err code (if any is not OK) */
+ char *errString = 0; /* addinfo */
+
+ rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
+ attr_init(&use, zapt, 1);
+ use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
+ yaz_log(log_level_rpn, "string_term, use value %d", use_value);
+ attr_init(&truncation, zapt, 5);
+ truncation_value = attr_find(&truncation, NULL);
+ yaz_log(log_level_rpn, "truncation value %d", truncation_value);
+
+ if (use_value == -1) /* no attribute - assumy "any" */
+ use_value = 1016;
+ for (base_no = 0; base_no < num_bases; base_no++)
+ {
+ int ord = -1;
+ int attr_ok = 0;
+ int regex_range = 0;
+ int init_pos = 0;
+ attent attp;
+ data1_local_attribute id_xpath_attr;
+ data1_local_attribute *local_attr;
+ int max_pos, prefix_len = 0;
+ int relation_error;
+
+ termp = *term_sub;
+
+ if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+ {
+ zh->errCode = YAZ_BIB1_DATABASE_UNAVAILABLE;
+ zh->errString = basenames[base_no];
+ return ZEBRA_FAIL;
+ }
+ if (xpath_use > 0 && use_value == -2)
+ {
+ /* xpath mode and we have a string attribute */
+ attp.local_attributes = &id_xpath_attr;
+ attp.attset_ordinal = VAL_IDXPATH;
+ id_xpath_attr.next = 0;
+
+ use_value = xpath_use; /* xpath_use as use-attribute now */
+ id_xpath_attr.local = use_value;
+ }
+ else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
+ {
+ /* X-Path attribute, use numeric value directly */
+ attp.local_attributes = &id_xpath_attr;
+ attp.attset_ordinal = VAL_IDXPATH;
+ id_xpath_attr.next = 0;
+ id_xpath_attr.local = use_value;
+ }
+ else if (use_string &&
+ (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+ use_string)) >= 0)
+ {
+ /* we have a match for a raw string attribute */
+ char ord_buf[32];
+ int i, ord_len;
+
+ if (prefix_len)
+ term_dict[prefix_len++] = '|';
+ else
+ term_dict[prefix_len++] = '(';
+
+ ord_len = key_SU_encode (ord, ord_buf);
+ for (i = 0; i<ord_len; i++)
+ {
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = ord_buf[i];
+ }
+ attp.local_attributes = 0; /* no more attributes */
+ }
+ else
+ {
+ /* lookup in the .att files . Allow string as well */
+ if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
+ use_string)))
+ {
+ yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
+ curAttributeSet, use_value, r);
+ if (r == -1)
+ {
+ /* set was found, but value wasn't defined */
+ errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
+ if (use_string)
+ errString = nmem_strdup(stream, use_string);
+ else
+ errString = nmem_strdup_i (stream, use_value);
+ }
+ else
+ {
+ int oid[OID_SIZE];
+ struct oident oident;
+
+ oident.proto = PROTO_Z3950;
+ oident.oclass = CLASS_ATTSET;
+ oident.value = curAttributeSet;
+ oid_ent_to_oid (&oident, oid);
+
+ errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
+ errString = nmem_strdup(stream, oident.desc);
+ }
+ continue;
+ }
+ }
+ for (local_attr = attp.local_attributes; local_attr;
+ local_attr = local_attr->next)
+ {
+ char ord_buf[32];
+ int i, ord_len;
+
+ ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+ attp.attset_ordinal,
+ local_attr->local);
+ if (ord < 0)
+ continue;
+ if (prefix_len)
+ term_dict[prefix_len++] = '|';
+ else
+ term_dict[prefix_len++] = '(';
+
+ ord_len = key_SU_encode (ord, ord_buf);
+ for (i = 0; i<ord_len; i++)
+ {
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = ord_buf[i];
+ }
+ }
+ bases_ok++;
+ if (prefix_len)
+ attr_ok = 1;
+
+ term_dict[prefix_len++] = ')';
+ term_dict[prefix_len++] = 1;
+ term_dict[prefix_len++] = reg_type;
+ yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
+ term_dict[prefix_len] = '\0';
+ j = prefix_len;
+ switch (truncation_value)
+ {
+ case -1: /* not specified */
+ case 100: /* do not truncate */
+ if (!string_relation (zh, zapt, &termp, term_dict,
+ attributeSet,
+ reg_type, space_split, term_dst,
+ &relation_error))
+ {
+ if (relation_error)
+ {
+ zh->errCode = relation_error;
+ return ZEBRA_FAIL;
+ }
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ break;
+ case 1: /* right truncation */
+ term_dict[j++] = '(';
+ if (!term_100(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ".*)");
+ break;
+ case 2: /* keft truncation */
+ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+ if (!term_100(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ break;
+ case 3: /* left&right truncation */
+ term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+ if (!term_100(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ".*)");
+ break;
+ case 101: /* process # in term */
+ term_dict[j++] = '(';
+ if (!term_101(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ break;
+ case 102: /* Regexp-1 */
+ term_dict[j++] = '(';
+ if (!term_102(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ break;
+ case 103: /* Regexp-2 */
+ r = 1;
+ term_dict[j++] = '(';
+ init_pos = 2;
+ if (!term_103(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, ®ex_range,
+ space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ case 104: /* process # and ! in term */
+ term_dict[j++] = '(';
+ if (!term_104(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ break;
+ case 105: /* process * and ! in term */
+ term_dict[j++] = '(';
+ if (!term_105(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst, 1))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ break;
+ case 106: /* process * and ! in term */
+ term_dict[j++] = '(';
+ if (!term_105(zh->reg->zebra_maps, reg_type,
+ &termp, term_dict + j, space_split, term_dst, 0))
+ {
+ *term_sub = 0;
+ return ZEBRA_OK;
+ }
+ strcat(term_dict, ")");
+ break;
+ default:
+ zh->errCode = YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE;
+ zh->errString = nmem_strdup_i(stream, truncation_value);
+ return ZEBRA_FAIL;
+ }
+ if (attr_ok)
+ {
+ char buf[80];
+ const char *input = term_dict + prefix_len;
+ esc_str(buf, sizeof(buf), input, strlen(input));
+ }
+ if (attr_ok)
+ {
+ yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
+ r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
+ grep_info, &max_pos, init_pos,
+ grep_handle);
+ if (r)
+ yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
+ }
+ }
+ if (!bases_ok)
+ {
+ zh->errCode = errCode;
+ zh->errString = errString;
+ return ZEBRA_FAIL;
+ }
+ *term_sub = termp;
+ yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
+ return ZEBRA_OK;
+}
+
+
+/* convert APT search term to UTF8 */
+static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz)
+{
+ size_t sizez;
+ Z_Term *term = zapt->term;
+
+ switch (term->which)
+ {
+ case Z_Term_general:
+ if (zh->iconv_to_utf8 != 0)
+ {
+ char *inbuf = term->u.general->buf;
+ size_t inleft = term->u.general->len;
+ char *outbuf = termz;
+ size_t outleft = IT_MAX_WORD-1;
+ size_t ret;
+
+ ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
+ &outbuf, &outleft);
+ if (ret == (size_t)(-1))
+ {
+ ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
+ zh->errCode =
+ YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_;
+ return -1;
+ }
+ *outbuf = 0;
+ }
+ else
+ {
+ sizez = term->u.general->len;
+ if (sizez > IT_MAX_WORD-1)
+ sizez = IT_MAX_WORD-1;
+ memcpy (termz, term->u.general->buf, sizez);
+ termz[sizez] = '\0';
+ }
+ break;
+ case Z_Term_characterString:
+ sizez = strlen(term->u.characterString);
+ if (sizez > IT_MAX_WORD-1)
+ sizez = IT_MAX_WORD-1;
+ memcpy (termz, term->u.characterString, sizez);
+ termz[sizez] = '\0';
+ break;
+ default:
+ zh->errCode = YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM;
+ return ZEBRA_FAIL;
+ }
+ return ZEBRA_OK;
+}
+
+/* convert APT SCAN term to internal cmap */
+static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ char *termz, int reg_type)
+{
+ char termz0[IT_MAX_WORD];
+
+ if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
+ return ZEBRA_FAIL; /* error */
+ else
+ {
+ const char **map;
+ const char *cp = (const char *) termz0;
+ const char *cp_end = cp + strlen(cp);
+ const char *src;
+ int i = 0;
+ const char *space_map = NULL;
+ int len;
+
+ while ((len = (cp_end - cp)) > 0)
+ {
+ map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
+ if (**map == *CHR_SPACE)
+ space_map = *map;
+ else
+ {
+ if (i && space_map)
+ for (src = space_map; *src; src++)
+ termz[i++] = *src;
+ space_map = NULL;
+ for (src = *map; *src; src++)
+ termz[i++] = *src;
+ }
+ }
+ termz[i] = '\0';
+ }
+ return ZEBRA_OK;
+}
+
+char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+ const char *termz, NMEM stream, unsigned reg_id)
+{
+ WRBUF wrbuf = 0;
+ AttrType truncation;
+ int truncation_value;
+ char *ex_list = 0;
+
+ attr_init(&truncation, zapt, 5);
+ truncation_value = attr_find(&truncation, NULL);