1 /* $Id: zrpn.c,v 1.228 2006-09-08 14:40:53 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 #define TERMSET_DISABLE 1
51 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
53 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
54 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
58 const char *outp = *out;
59 yaz_log(YLOG_LOG, "---");
62 yaz_log(YLOG_LOG, "%02X", *outp);
70 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
71 struct rpn_char_map_info *map_info)
73 map_info->zm = reg->zebra_maps;
74 map_info->reg_type = reg_type;
75 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
92 void zebra_term_untrans(ZebraHandle zh, int reg_type,
93 char *dst, const char *src)
98 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
102 if (len < IT_MAX_WORD-1)
107 while (*cp && len < IT_MAX_WORD-1)
113 static void add_isam_p(const char *name, const char *info,
118 log_level_rpn = yaz_log_module_level("rpn");
121 if (p->isam_p_indx == p->isam_p_size)
123 ISAM_P *new_isam_p_buf;
127 p->isam_p_size = 2*p->isam_p_size + 100;
128 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
132 memcpy(new_isam_p_buf, p->isam_p_buf,
133 p->isam_p_indx * sizeof(*p->isam_p_buf));
134 xfree(p->isam_p_buf);
136 p->isam_p_buf = new_isam_p_buf;
139 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
142 memcpy(new_term_no, p->isam_p_buf,
143 p->isam_p_indx * sizeof(*p->term_no));
146 p->term_no = new_term_no;
149 assert(*info == sizeof(*p->isam_p_buf));
150 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
155 char term_tmp[IT_MAX_WORD];
157 const char *index_name;
158 int len = key_SU_decode (&ord, (const unsigned char *) name);
160 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
161 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
162 zebraExplain_lookup_ord(p->zh->reg->zei,
163 ord, 0 /* index_type */, &db, &index_name);
164 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
166 resultSetAddTerm(p->zh, p->termset, name[len], db,
167 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
889 yaz_log(log_level_rpn, "Relation =");
890 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
891 term_component, space_split, term_dst))
893 strcat(term_tmp, "(");
894 strcat(term_tmp, term_component);
895 strcat(term_tmp, ")");
898 yaz_log(log_level_rpn, "Relation always matches");
899 /* skip to end of term (we don't care what it is) */
900 while (**term_sub != '\0')
904 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
910 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
911 const char **term_sub,
912 oid_value attributeSet, NMEM stream,
913 struct grep_info *grep_info,
914 int reg_type, int complete_flag,
915 int num_bases, char **basenames,
917 const char *xpath_use,
918 struct ord_list **ol);
920 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
921 Z_AttributesPlusTerm *zapt,
922 zint *hits_limit_value,
923 const char **term_ref_id_str,
926 AttrType term_ref_id_attr;
927 AttrType hits_limit_attr;
930 attr_init_APT(&hits_limit_attr, zapt, 11);
931 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
933 attr_init_APT(&term_ref_id_attr, zapt, 10);
934 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
935 if (term_ref_id_int >= 0)
937 char *res = nmem_malloc(nmem, 20);
938 sprintf(res, "%d", term_ref_id_int);
939 *term_ref_id_str = res;
942 /* no limit given ? */
943 if (*hits_limit_value == -1)
945 if (*term_ref_id_str)
947 /* use global if term_ref is present */
948 *hits_limit_value = zh->approx_limit;
952 /* no counting if term_ref is not present */
953 *hits_limit_value = 0;
956 else if (*hits_limit_value == 0)
958 /* 0 is the same as global limit */
959 *hits_limit_value = zh->approx_limit;
961 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
962 *term_ref_id_str ? *term_ref_id_str : "none",
967 static ZEBRA_RES term_trunc(ZebraHandle zh,
968 Z_AttributesPlusTerm *zapt,
969 const char **term_sub,
970 oid_value attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 int reg_type, int complete_flag,
973 int num_bases, char **basenames,
975 const char *rank_type,
976 const char *xpath_use,
979 struct rset_key_control *kc)
983 zint hits_limit_value;
984 const char *term_ref_id_str = 0;
987 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
988 grep_info->isam_p_indx = 0;
989 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
990 reg_type, complete_flag, num_bases, basenames,
991 term_dst, xpath_use, &ol);
994 if (!*term_sub) /* no more terms ? */
996 yaz_log(log_level_rpn, "term: %s", term_dst);
997 *rset = rset_trunc(zh, grep_info->isam_p_buf,
998 grep_info->isam_p_indx, term_dst,
999 strlen(term_dst), rank_type, 1 /* preserve pos */,
1000 zapt->term->which, rset_nmem,
1001 kc, kc->scope, ol, reg_type, hits_limit_value,
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009 const char **term_sub,
1010 oid_value attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1015 const char *xpath_use,
1016 struct ord_list **ol)
1018 char term_dict[2*IT_MAX_WORD+4000];
1020 AttrType truncation;
1021 int truncation_value;
1023 struct rpn_char_map_info rcmi;
1024 int space_split = complete_flag ? 0 : 1;
1026 int bases_ok = 0; /* no of databases with OK attribute */
1028 *ol = ord_list_create(stream);
1030 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1031 attr_init_APT(&truncation, zapt, 5);
1032 truncation_value = attr_find(&truncation, NULL);
1033 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1035 for (base_no = 0; base_no < num_bases; base_no++)
1038 int regex_range = 0;
1039 int max_pos, prefix_len = 0;
1044 termp = *term_sub; /* start of term for each database */
1046 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1048 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049 basenames[base_no]);
1053 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054 attributeSet, &ord) != ZEBRA_OK)
1059 *ol = ord_list_append(stream, *ol, ord);
1060 ord_len = key_SU_encode (ord, ord_buf);
1062 term_dict[prefix_len++] = '(';
1063 for (i = 0; i<ord_len; i++)
1065 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1066 term_dict[prefix_len++] = ord_buf[i];
1068 term_dict[prefix_len++] = ')';
1069 term_dict[prefix_len] = '\0';
1071 switch (truncation_value)
1073 case -1: /* not specified */
1074 case 100: /* do not truncate */
1075 if (!string_relation(zh, zapt, &termp, term_dict,
1077 reg_type, space_split, term_dst,
1082 zebra_setError(zh, relation_error, 0);
1089 case 1: /* right truncation */
1090 term_dict[j++] = '(';
1091 if (!term_100(zh->reg->zebra_maps, reg_type,
1092 &termp, term_dict + j, space_split, term_dst))
1097 strcat(term_dict, ".*)");
1099 case 2: /* keft truncation */
1100 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1101 if (!term_100(zh->reg->zebra_maps, reg_type,
1102 &termp, term_dict + j, space_split, term_dst))
1107 strcat(term_dict, ")");
1109 case 3: /* left&right truncation */
1110 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1111 if (!term_100(zh->reg->zebra_maps, reg_type,
1112 &termp, term_dict + j, space_split, term_dst))
1117 strcat(term_dict, ".*)");
1119 case 101: /* process # in term */
1120 term_dict[j++] = '(';
1121 if (!term_101(zh->reg->zebra_maps, reg_type,
1122 &termp, term_dict + j, space_split, term_dst))
1127 strcat(term_dict, ")");
1129 case 102: /* Regexp-1 */
1130 term_dict[j++] = '(';
1131 if (!term_102(zh->reg->zebra_maps, reg_type,
1132 &termp, term_dict + j, space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 103: /* Regexp-2 */
1141 term_dict[j++] = '(';
1142 if (!term_103(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, ®ex_range,
1144 space_split, term_dst))
1149 strcat(term_dict, ")");
1151 case 104: /* process # and ! in term */
1152 term_dict[j++] = '(';
1153 if (!term_104(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, space_split, term_dst))
1159 strcat(term_dict, ")");
1161 case 105: /* process * and ! in term */
1162 term_dict[j++] = '(';
1163 if (!term_105(zh->reg->zebra_maps, reg_type,
1164 &termp, term_dict + j, space_split, term_dst, 1))
1169 strcat(term_dict, ")");
1171 case 106: /* process * and ! in term */
1172 term_dict[j++] = '(';
1173 if (!term_105(zh->reg->zebra_maps, reg_type,
1174 &termp, term_dict + j, space_split, term_dst, 0))
1179 strcat(term_dict, ")");
1182 zebra_setError_zint(zh,
1183 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1190 const char *input = term_dict + prefix_len;
1191 esc_str(buf, sizeof(buf), input, strlen(input));
1193 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1194 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1195 grep_info, &max_pos,
1196 ord_len /* number of "exact" chars */,
1199 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1204 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1209 /* convert APT search term to UTF8 */
1210 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1214 Z_Term *term = zapt->term;
1216 switch (term->which)
1218 case Z_Term_general:
1219 if (zh->iconv_to_utf8 != 0)
1221 char *inbuf = (char *) term->u.general->buf;
1222 size_t inleft = term->u.general->len;
1223 char *outbuf = termz;
1224 size_t outleft = IT_MAX_WORD-1;
1227 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1229 if (ret == (size_t)(-1))
1231 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1234 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1242 sizez = term->u.general->len;
1243 if (sizez > IT_MAX_WORD-1)
1244 sizez = IT_MAX_WORD-1;
1245 memcpy (termz, term->u.general->buf, sizez);
1246 termz[sizez] = '\0';
1249 case Z_Term_characterString:
1250 sizez = strlen(term->u.characterString);
1251 if (sizez > IT_MAX_WORD-1)
1252 sizez = IT_MAX_WORD-1;
1253 memcpy (termz, term->u.characterString, sizez);
1254 termz[sizez] = '\0';
1257 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1263 /* convert APT SCAN term to internal cmap */
1264 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1265 char *termz, int reg_type)
1267 char termz0[IT_MAX_WORD];
1269 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1270 return ZEBRA_FAIL; /* error */
1274 const char *cp = (const char *) termz0;
1275 const char *cp_end = cp + strlen(cp);
1278 const char *space_map = NULL;
1281 while ((len = (cp_end - cp)) > 0)
1283 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1284 if (**map == *CHR_SPACE)
1289 for (src = space_map; *src; src++)
1292 for (src = *map; *src; src++)
1301 static void grep_info_delete(struct grep_info *grep_info)
1304 xfree(grep_info->term_no);
1306 xfree(grep_info->isam_p_buf);
1309 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 struct grep_info *grep_info,
1315 int termset_value_numeric;
1316 const char *termset_value_string;
1319 grep_info->term_no = 0;
1321 grep_info->isam_p_size = 0;
1322 grep_info->isam_p_buf = NULL;
1324 grep_info->reg_type = reg_type;
1325 grep_info->termset = 0;
1328 attr_init_APT(&termset, zapt, 8);
1329 termset_value_numeric =
1330 attr_find_ex(&termset, NULL, &termset_value_string);
1331 if (termset_value_numeric != -1)
1334 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1338 const char *termset_name = 0;
1339 if (termset_value_numeric != -2)
1342 sprintf(resname, "%d", termset_value_numeric);
1343 termset_name = resname;
1346 termset_name = termset_value_string;
1347 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1348 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1349 if (!grep_info->termset)
1351 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1360 \brief Create result set(s) for list of terms
1361 \param zh Zebra Handle
1362 \param zapt Attributes Plust Term (RPN leaf)
1363 \param termz term as used in query but converted to UTF-8
1364 \param attributeSet default attribute set
1365 \param stream memory for result
1366 \param reg_type register type ('w', 'p',..)
1367 \param complete_flag whether it's phrases or not
1368 \param rank_type term flags for ranking
1369 \param xpath_use use attribute for X-Path (-1 for no X-path)
1370 \param num_bases number of databases
1371 \param basenames array of databases
1372 \param rset_nmem memory for result sets
1373 \param result_sets output result set for each term in list (output)
1374 \param num_result_sets number of output result sets
1375 \param kc rset key control to be used for created result sets
1377 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1378 Z_AttributesPlusTerm *zapt,
1380 oid_value attributeSet,
1382 int reg_type, int complete_flag,
1383 const char *rank_type,
1384 const char *xpath_use,
1385 int num_bases, char **basenames,
1387 RSET **result_sets, int *num_result_sets,
1388 struct rset_key_control *kc)
1390 char term_dst[IT_MAX_WORD+1];
1391 struct grep_info grep_info;
1392 const char *termp = termz;
1395 *num_result_sets = 0;
1397 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1403 if (alloc_sets == *num_result_sets)
1406 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1409 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1410 alloc_sets = alloc_sets + add;
1411 *result_sets = rnew;
1413 res = term_trunc(zh, zapt, &termp, attributeSet,
1415 reg_type, complete_flag,
1416 num_bases, basenames,
1417 term_dst, rank_type,
1418 xpath_use, rset_nmem,
1419 &(*result_sets)[*num_result_sets],
1421 if (res != ZEBRA_OK)
1424 for (i = 0; i < *num_result_sets; i++)
1425 rset_delete((*result_sets)[i]);
1426 grep_info_delete (&grep_info);
1429 if ((*result_sets)[*num_result_sets] == 0)
1431 (*num_result_sets)++;
1436 grep_info_delete(&grep_info);
1440 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1441 Z_AttributesPlusTerm *zapt,
1442 oid_value attributeSet,
1444 int num_bases, char **basenames,
1447 struct rset_key_control *kc)
1455 attr_init_APT(&position, zapt, 3);
1456 position_value = attr_find(&position, NULL);
1457 switch(position_value)
1466 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1471 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1473 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1478 if (!zh->reg->isamb)
1480 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1484 f_set = xmalloc(sizeof(RSET) * num_bases);
1485 for (base_no = 0; base_no < num_bases; base_no++)
1489 char term_dict[100];
1494 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1496 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1497 basenames[base_no]);
1501 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1502 attributeSet, &ord) != ZEBRA_OK)
1505 ord_len = key_SU_encode (ord, ord_buf);
1506 memcpy(term_dict, ord_buf, ord_len);
1507 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1508 val = dict_lookup(zh->reg->dict, term_dict);
1511 assert(*val == sizeof(ISAM_P));
1512 memcpy(&isam_p, val+1, sizeof(isam_p));
1514 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1515 zh->reg->isamb, isam_p, 0);
1520 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1521 0 /* termid */, num_sets, f_set);
1527 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1528 Z_AttributesPlusTerm *zapt,
1529 const char *termz_org,
1530 oid_value attributeSet,
1532 int reg_type, int complete_flag,
1533 const char *rank_type,
1534 const char *xpath_use,
1535 int num_bases, char **basenames,
1538 struct rset_key_control *kc)
1540 RSET *result_sets = 0;
1541 int num_result_sets = 0;
1543 term_list_trunc(zh, zapt, termz_org, attributeSet,
1544 stream, reg_type, complete_flag,
1545 rank_type, xpath_use,
1546 num_bases, basenames,
1548 &result_sets, &num_result_sets, kc);
1550 if (res != ZEBRA_OK)
1553 if (num_result_sets > 0)
1556 res = rpn_search_APT_position(zh, zapt, attributeSet,
1558 num_bases, basenames,
1559 rset_nmem, &first_set,
1561 if (res != ZEBRA_OK)
1565 RSET *nsets = nmem_malloc(stream,
1566 sizeof(RSET) * (num_result_sets+1));
1567 nsets[0] = first_set;
1568 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1569 result_sets = nsets;
1573 if (num_result_sets == 0)
1574 *rset = rset_create_null(rset_nmem, kc, 0);
1575 else if (num_result_sets == 1)
1576 *rset = result_sets[0];
1578 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1579 num_result_sets, result_sets,
1580 1 /* ordered */, 0 /* exclusion */,
1581 3 /* relation */, 1 /* distance */);
1587 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1588 Z_AttributesPlusTerm *zapt,
1589 const char *termz_org,
1590 oid_value attributeSet,
1592 int reg_type, int complete_flag,
1593 const char *rank_type,
1594 const char *xpath_use,
1595 int num_bases, char **basenames,
1598 struct rset_key_control *kc)
1600 RSET *result_sets = 0;
1601 int num_result_sets = 0;
1603 term_list_trunc(zh, zapt, termz_org, attributeSet,
1604 stream, reg_type, complete_flag,
1605 rank_type, xpath_use,
1606 num_bases, basenames,
1608 &result_sets, &num_result_sets, kc);
1609 if (res != ZEBRA_OK)
1611 if (num_result_sets == 0)
1612 *rset = rset_create_null(rset_nmem, kc, 0);
1613 else if (num_result_sets == 1)
1614 *rset = result_sets[0];
1616 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1617 num_result_sets, result_sets);
1623 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1624 Z_AttributesPlusTerm *zapt,
1625 const char *termz_org,
1626 oid_value attributeSet,
1628 int reg_type, int complete_flag,
1629 const char *rank_type,
1630 const char *xpath_use,
1631 int num_bases, char **basenames,
1634 struct rset_key_control *kc)
1636 RSET *result_sets = 0;
1637 int num_result_sets = 0;
1639 term_list_trunc(zh, zapt, termz_org, attributeSet,
1640 stream, reg_type, complete_flag,
1641 rank_type, xpath_use,
1642 num_bases, basenames,
1644 &result_sets, &num_result_sets,
1646 if (res != ZEBRA_OK)
1648 if (num_result_sets == 0)
1649 *rset = rset_create_null(rset_nmem, kc, 0);
1650 else if (num_result_sets == 1)
1651 *rset = result_sets[0];
1653 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1654 num_result_sets, result_sets);
1660 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1661 const char **term_sub,
1663 oid_value attributeSet,
1664 struct grep_info *grep_info,
1674 char *term_tmp = term_dict + strlen(term_dict);
1677 attr_init_APT(&relation, zapt, 2);
1678 relation_value = attr_find(&relation, NULL);
1680 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1682 switch (relation_value)
1685 yaz_log(log_level_rpn, "Relation <");
1686 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1689 term_value = atoi (term_tmp);
1690 gen_regular_rel(term_tmp, term_value-1, 1);
1693 yaz_log(log_level_rpn, "Relation <=");
1694 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1697 term_value = atoi (term_tmp);
1698 gen_regular_rel(term_tmp, term_value, 1);
1701 yaz_log(log_level_rpn, "Relation >=");
1702 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1705 term_value = atoi (term_tmp);
1706 gen_regular_rel(term_tmp, term_value, 0);
1709 yaz_log(log_level_rpn, "Relation >");
1710 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1713 term_value = atoi (term_tmp);
1714 gen_regular_rel(term_tmp, term_value+1, 0);
1718 yaz_log(log_level_rpn, "Relation =");
1719 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1722 term_value = atoi (term_tmp);
1723 sprintf(term_tmp, "(0*%d)", term_value);
1726 /* term_tmp untouched.. */
1727 while (**term_sub != '\0')
1731 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1734 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1735 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1738 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1739 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1743 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1744 const char **term_sub,
1745 oid_value attributeSet, NMEM stream,
1746 struct grep_info *grep_info,
1747 int reg_type, int complete_flag,
1748 int num_bases, char **basenames,
1750 const char *xpath_use,
1751 struct ord_list **ol)
1753 char term_dict[2*IT_MAX_WORD+2];
1756 struct rpn_char_map_info rcmi;
1758 int bases_ok = 0; /* no of databases with OK attribute */
1760 *ol = ord_list_create(stream);
1762 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1764 for (base_no = 0; base_no < num_bases; base_no++)
1766 int max_pos, prefix_len = 0;
1767 int relation_error = 0;
1768 int ord, ord_len, i;
1773 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1775 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1776 basenames[base_no]);
1780 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1781 attributeSet, &ord) != ZEBRA_OK)
1785 *ol = ord_list_append(stream, *ol, ord);
1787 ord_len = key_SU_encode (ord, ord_buf);
1789 term_dict[prefix_len++] = '(';
1790 for (i = 0; i < ord_len; i++)
1792 term_dict[prefix_len++] = 1;
1793 term_dict[prefix_len++] = ord_buf[i];
1795 term_dict[prefix_len++] = ')';
1796 term_dict[prefix_len] = '\0';
1798 if (!numeric_relation(zh, zapt, &termp, term_dict,
1799 attributeSet, grep_info, &max_pos, reg_type,
1800 term_dst, &relation_error))
1804 zebra_setError(zh, relation_error, 0);
1814 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1819 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1820 Z_AttributesPlusTerm *zapt,
1822 oid_value attributeSet,
1824 int reg_type, int complete_flag,
1825 const char *rank_type,
1826 const char *xpath_use,
1827 int num_bases, char **basenames,
1830 struct rset_key_control *kc)
1832 char term_dst[IT_MAX_WORD+1];
1833 const char *termp = termz;
1834 RSET *result_sets = 0;
1835 int num_result_sets = 0;
1837 struct grep_info grep_info;
1839 zint hits_limit_value;
1840 const char *term_ref_id_str = 0;
1842 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1844 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1845 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1849 struct ord_list *ol;
1850 if (alloc_sets == num_result_sets)
1853 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1856 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1857 alloc_sets = alloc_sets + add;
1860 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1861 grep_info.isam_p_indx = 0;
1862 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1863 reg_type, complete_flag, num_bases, basenames,
1864 term_dst, xpath_use, &ol);
1865 if (res == ZEBRA_FAIL || termp == 0)
1867 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1868 result_sets[num_result_sets] =
1869 rset_trunc(zh, grep_info.isam_p_buf,
1870 grep_info.isam_p_indx, term_dst,
1871 strlen(term_dst), rank_type,
1872 0 /* preserve position */,
1873 zapt->term->which, rset_nmem,
1874 kc, kc->scope, ol, reg_type,
1877 if (!result_sets[num_result_sets])
1883 grep_info_delete(&grep_info);
1885 if (res != ZEBRA_OK)
1887 if (num_result_sets == 0)
1888 *rset = rset_create_null(rset_nmem, kc, 0);
1889 else if (num_result_sets == 1)
1890 *rset = result_sets[0];
1892 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1893 num_result_sets, result_sets);
1899 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1900 Z_AttributesPlusTerm *zapt,
1902 oid_value attributeSet,
1904 const char *rank_type, NMEM rset_nmem,
1906 struct rset_key_control *kc)
1911 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1912 res_get (zh->res, "setTmpDir"),0 );
1913 rsfd = rset_open(*rset, RSETF_WRITE);
1921 rset_write (rsfd, &key);
1926 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1927 oid_value attributeSet, NMEM stream,
1928 Z_SortKeySpecList *sort_sequence,
1929 const char *rank_type,
1932 struct rset_key_control *kc)
1935 int sort_relation_value;
1936 AttrType sort_relation_type;
1943 attr_init_APT(&sort_relation_type, zapt, 7);
1944 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1946 if (!sort_sequence->specs)
1948 sort_sequence->num_specs = 10;
1949 sort_sequence->specs = (Z_SortKeySpec **)
1950 nmem_malloc(stream, sort_sequence->num_specs *
1951 sizeof(*sort_sequence->specs));
1952 for (i = 0; i<sort_sequence->num_specs; i++)
1953 sort_sequence->specs[i] = 0;
1955 if (zapt->term->which != Z_Term_general)
1958 i = atoi_n ((char *) zapt->term->u.general->buf,
1959 zapt->term->u.general->len);
1960 if (i >= sort_sequence->num_specs)
1962 sprintf(termz, "%d", i);
1964 oe.proto = PROTO_Z3950;
1965 oe.oclass = CLASS_ATTSET;
1966 oe.value = attributeSet;
1967 if (!oid_ent_to_oid (&oe, oid))
1970 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1971 sks->sortElement = (Z_SortElement *)
1972 nmem_malloc(stream, sizeof(*sks->sortElement));
1973 sks->sortElement->which = Z_SortElement_generic;
1974 sk = sks->sortElement->u.generic = (Z_SortKey *)
1975 nmem_malloc(stream, sizeof(*sk));
1976 sk->which = Z_SortKey_sortAttributes;
1977 sk->u.sortAttributes = (Z_SortAttributes *)
1978 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1980 sk->u.sortAttributes->id = oid;
1981 sk->u.sortAttributes->list = zapt->attributes;
1983 sks->sortRelation = (int *)
1984 nmem_malloc(stream, sizeof(*sks->sortRelation));
1985 if (sort_relation_value == 1)
1986 *sks->sortRelation = Z_SortKeySpec_ascending;
1987 else if (sort_relation_value == 2)
1988 *sks->sortRelation = Z_SortKeySpec_descending;
1990 *sks->sortRelation = Z_SortKeySpec_ascending;
1992 sks->caseSensitivity = (int *)
1993 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1994 *sks->caseSensitivity = 0;
1996 sks->which = Z_SortKeySpec_null;
1997 sks->u.null = odr_nullval ();
1998 sort_sequence->specs[i] = sks;
1999 *rset = rset_create_null(rset_nmem, kc, 0);
2004 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2005 oid_value attributeSet,
2006 struct xpath_location_step *xpath, int max,
2009 oid_value curAttributeSet = attributeSet;
2011 const char *use_string = 0;
2013 attr_init_APT(&use, zapt, 1);
2014 attr_find_ex(&use, &curAttributeSet, &use_string);
2016 if (!use_string || *use_string != '/')
2019 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2024 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2025 int reg_type, const char *term,
2026 const char *xpath_use,
2028 struct rset_key_control *kc)
2031 struct grep_info grep_info;
2032 char term_dict[2048];
2035 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2036 zinfo_index_category_index,
2039 int ord_len, i, r, max_pos;
2040 int term_type = Z_Term_characterString;
2041 const char *flags = "void";
2043 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2044 return rset_create_null(rset_nmem, kc, 0);
2047 return rset_create_null(rset_nmem, kc, 0);
2049 term_dict[prefix_len++] = '|';
2051 term_dict[prefix_len++] = '(';
2053 ord_len = key_SU_encode (ord, ord_buf);
2054 for (i = 0; i<ord_len; i++)
2056 term_dict[prefix_len++] = 1;
2057 term_dict[prefix_len++] = ord_buf[i];
2059 term_dict[prefix_len++] = ')';
2060 strcpy(term_dict+prefix_len, term);
2062 grep_info.isam_p_indx = 0;
2063 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2064 &grep_info, &max_pos, 0, grep_handle);
2065 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2066 grep_info.isam_p_indx);
2067 rset = rset_trunc(zh, grep_info.isam_p_buf,
2068 grep_info.isam_p_indx, term, strlen(term),
2069 flags, 1, term_type,rset_nmem,
2070 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2071 0 /* term_ref_id_str */);
2072 grep_info_delete(&grep_info);
2077 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2078 int num_bases, char **basenames,
2079 NMEM stream, const char *rank_type, RSET rset,
2080 int xpath_len, struct xpath_location_step *xpath,
2083 struct rset_key_control *kc)
2087 int always_matches = rset ? 0 : 1;
2095 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2096 for (i = 0; i<xpath_len; i++)
2098 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2110 a[@attr = value]/b[@other = othervalue]
2112 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2113 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2114 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2115 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2116 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2117 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2121 dict_grep_cmap (zh->reg->dict, 0, 0);
2123 for (base_no = 0; base_no < num_bases; base_no++)
2125 int level = xpath_len;
2128 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2130 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2131 basenames[base_no]);
2135 while (--level >= 0)
2137 WRBUF xpath_rev = wrbuf_alloc();
2139 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2141 for (i = level; i >= 1; --i)
2143 const char *cp = xpath[i].part;
2149 wrbuf_puts(xpath_rev, "[^/]*");
2150 else if (*cp == ' ')
2151 wrbuf_puts(xpath_rev, "\001 ");
2153 wrbuf_putc(xpath_rev, *cp);
2155 /* wrbuf_putc does not null-terminate , but
2156 wrbuf_puts below ensures it does.. so xpath_rev
2157 is OK iff length is > 0 */
2159 wrbuf_puts(xpath_rev, "/");
2161 else if (i == 1) /* // case */
2162 wrbuf_puts(xpath_rev, ".*");
2164 if (xpath[level].predicate &&
2165 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2166 xpath[level].predicate->u.relation.name[0])
2168 WRBUF wbuf = wrbuf_alloc();
2169 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2170 if (xpath[level].predicate->u.relation.value)
2172 const char *cp = xpath[level].predicate->u.relation.value;
2173 wrbuf_putc(wbuf, '=');
2177 if (strchr(REGEX_CHARS, *cp))
2178 wrbuf_putc(wbuf, '\\');
2179 wrbuf_putc(wbuf, *cp);
2183 wrbuf_puts(wbuf, "");
2184 rset_attr = xpath_trunc(
2185 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2187 wrbuf_free(wbuf, 1);
2193 wrbuf_free(xpath_rev, 1);
2197 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2198 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2199 if (wrbuf_len(xpath_rev))
2201 rset_start_tag = xpath_trunc(zh, stream, '0',
2202 wrbuf_buf(xpath_rev),
2203 ZEBRA_XPATH_ELM_BEGIN,
2206 rset = rset_start_tag;
2209 rset_end_tag = xpath_trunc(zh, stream, '0',
2210 wrbuf_buf(xpath_rev),
2211 ZEBRA_XPATH_ELM_END,
2214 rset = rset_create_between(rset_nmem, kc, kc->scope,
2215 rset_start_tag, rset,
2216 rset_end_tag, rset_attr);
2219 wrbuf_free(xpath_rev, 1);
2227 #define MAX_XPATH_STEPS 10
2229 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2230 oid_value attributeSet, NMEM stream,
2231 Z_SortKeySpecList *sort_sequence,
2232 int num_bases, char **basenames,
2235 struct rset_key_control *kc)
2237 ZEBRA_RES res = ZEBRA_OK;
2239 char *search_type = NULL;
2240 char rank_type[128];
2243 char termz[IT_MAX_WORD+1];
2245 const char *xpath_use = 0;
2246 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2250 log_level_rpn = yaz_log_module_level("rpn");
2253 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2254 rank_type, &complete_flag, &sort_flag);
2256 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2257 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2258 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2259 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2261 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2265 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2266 rank_type, rset_nmem, rset, kc);
2267 /* consider if an X-Path query is used */
2268 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2269 xpath, MAX_XPATH_STEPS, stream);
2272 if (xpath[xpath_len-1].part[0] == '@')
2273 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2275 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2282 attr_init_APT(&relation, zapt, 2);
2283 relation_value = attr_find(&relation, NULL);
2285 if (relation_value == 103) /* alwaysmatches */
2287 *rset = 0; /* signal no "term" set */
2288 return rpn_search_xpath(zh, num_bases, basenames,
2289 stream, rank_type, *rset,
2290 xpath_len, xpath, rset_nmem, rset, kc);
2295 /* search using one of the various search type strategies
2296 termz is our UTF-8 search term
2297 attributeSet is top-level default attribute set
2298 stream is ODR for search
2299 reg_id is the register type
2300 complete_flag is 1 for complete subfield, 0 for incomplete
2301 xpath_use is use-attribute to be used for X-Path search, 0 for none
2303 if (!strcmp(search_type, "phrase"))
2305 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2306 reg_id, complete_flag, rank_type,
2308 num_bases, basenames, rset_nmem,
2311 else if (!strcmp(search_type, "and-list"))
2313 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2314 reg_id, complete_flag, rank_type,
2316 num_bases, basenames, rset_nmem,
2319 else if (!strcmp(search_type, "or-list"))
2321 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2322 reg_id, complete_flag, rank_type,
2324 num_bases, basenames, rset_nmem,
2327 else if (!strcmp(search_type, "local"))
2329 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2330 rank_type, rset_nmem, rset, kc);
2332 else if (!strcmp(search_type, "numeric"))
2334 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2335 reg_id, complete_flag, rank_type,
2337 num_bases, basenames, rset_nmem,
2342 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2345 if (res != ZEBRA_OK)
2349 return rpn_search_xpath(zh, num_bases, basenames,
2350 stream, rank_type, *rset,
2351 xpath_len, xpath, rset_nmem, rset, kc);
2354 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2355 oid_value attributeSet,
2356 NMEM stream, NMEM rset_nmem,
2357 Z_SortKeySpecList *sort_sequence,
2358 int num_bases, char **basenames,
2359 RSET **result_sets, int *num_result_sets,
2360 Z_Operator *parent_op,
2361 struct rset_key_control *kc);
2363 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2364 oid_value attributeSet,
2365 NMEM stream, NMEM rset_nmem,
2366 Z_SortKeySpecList *sort_sequence,
2367 int num_bases, char **basenames,
2370 RSET *result_sets = 0;
2371 int num_result_sets = 0;
2373 struct rset_key_control *kc = zebra_key_control_create(zh);
2375 res = rpn_search_structure(zh, zs, attributeSet,
2378 num_bases, basenames,
2379 &result_sets, &num_result_sets,
2380 0 /* no parent op */,
2382 if (res != ZEBRA_OK)
2385 for (i = 0; i<num_result_sets; i++)
2386 rset_delete(result_sets[i]);
2391 assert(num_result_sets == 1);
2392 assert(result_sets);
2393 assert(*result_sets);
2394 *result_set = *result_sets;
2400 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2401 oid_value attributeSet,
2402 NMEM stream, NMEM rset_nmem,
2403 Z_SortKeySpecList *sort_sequence,
2404 int num_bases, char **basenames,
2405 RSET **result_sets, int *num_result_sets,
2406 Z_Operator *parent_op,
2407 struct rset_key_control *kc)
2409 *num_result_sets = 0;
2410 if (zs->which == Z_RPNStructure_complex)
2413 Z_Operator *zop = zs->u.complex->roperator;
2414 RSET *result_sets_l = 0;
2415 int num_result_sets_l = 0;
2416 RSET *result_sets_r = 0;
2417 int num_result_sets_r = 0;
2419 res = rpn_search_structure(zh, zs->u.complex->s1,
2420 attributeSet, stream, rset_nmem,
2422 num_bases, basenames,
2423 &result_sets_l, &num_result_sets_l,
2425 if (res != ZEBRA_OK)
2428 for (i = 0; i<num_result_sets_l; i++)
2429 rset_delete(result_sets_l[i]);
2432 res = rpn_search_structure(zh, zs->u.complex->s2,
2433 attributeSet, stream, rset_nmem,
2435 num_bases, basenames,
2436 &result_sets_r, &num_result_sets_r,
2438 if (res != ZEBRA_OK)
2441 for (i = 0; i<num_result_sets_l; i++)
2442 rset_delete(result_sets_l[i]);
2443 for (i = 0; i<num_result_sets_r; i++)
2444 rset_delete(result_sets_r[i]);
2448 /* make a new list of result for all children */
2449 *num_result_sets = num_result_sets_l + num_result_sets_r;
2450 *result_sets = nmem_malloc(stream, *num_result_sets *
2451 sizeof(**result_sets));
2452 memcpy(*result_sets, result_sets_l,
2453 num_result_sets_l * sizeof(**result_sets));
2454 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2455 num_result_sets_r * sizeof(**result_sets));
2457 if (!parent_op || parent_op->which != zop->which
2458 || (zop->which != Z_Operator_and &&
2459 zop->which != Z_Operator_or))
2461 /* parent node different from this one (or non-present) */
2462 /* we must combine result sets now */
2466 case Z_Operator_and:
2467 rset = rset_create_and(rset_nmem, kc,
2469 *num_result_sets, *result_sets);
2472 rset = rset_create_or(rset_nmem, kc,
2473 kc->scope, 0, /* termid */
2474 *num_result_sets, *result_sets);
2476 case Z_Operator_and_not:
2477 rset = rset_create_not(rset_nmem, kc,
2482 case Z_Operator_prox:
2483 if (zop->u.prox->which != Z_ProximityOperator_known)
2486 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2490 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2492 zebra_setError_zint(zh,
2493 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2494 *zop->u.prox->u.known);
2499 rset = rset_create_prox(rset_nmem, kc,
2501 *num_result_sets, *result_sets,
2502 *zop->u.prox->ordered,
2503 (!zop->u.prox->exclusion ?
2504 0 : *zop->u.prox->exclusion),
2505 *zop->u.prox->relationType,
2506 *zop->u.prox->distance );
2510 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2513 *num_result_sets = 1;
2514 *result_sets = nmem_malloc(stream, *num_result_sets *
2515 sizeof(**result_sets));
2516 (*result_sets)[0] = rset;
2519 else if (zs->which == Z_RPNStructure_simple)
2524 if (zs->u.simple->which == Z_Operand_APT)
2526 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2527 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2528 attributeSet, stream, sort_sequence,
2529 num_bases, basenames, rset_nmem, &rset,
2531 if (res != ZEBRA_OK)
2534 else if (zs->u.simple->which == Z_Operand_resultSetId)
2536 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2537 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2541 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2542 zs->u.simple->u.resultSetId);
2549 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2552 *num_result_sets = 1;
2553 *result_sets = nmem_malloc(stream, *num_result_sets *
2554 sizeof(**result_sets));
2555 (*result_sets)[0] = rset;
2559 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2565 struct scan_info_entry {
2571 struct scan_info_entry *list;
2577 static int scan_handle (char *name, const char *info, int pos, void *client)
2579 int len_prefix, idx;
2580 struct scan_info *scan_info = (struct scan_info *) client;
2582 len_prefix = strlen(scan_info->prefix);
2583 if (memcmp (name, scan_info->prefix, len_prefix))
2586 idx = scan_info->after - pos + scan_info->before;
2590 /* skip special terms.. of no interest */
2591 if (name[len_prefix] < 4)
2596 scan_info->list[idx].term = (char *)
2597 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2598 strcpy(scan_info->list[idx].term, name + len_prefix);
2599 assert (*info == sizeof(ISAM_P));
2600 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2604 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2605 char **dst, const char *src)
2607 char term_src[IT_MAX_WORD];
2608 char term_dst[IT_MAX_WORD];
2610 zebra_term_untrans (zh, reg_type, term_src, src);
2612 if (zh->iconv_from_utf8 != 0)
2615 char *inbuf = term_src;
2616 size_t inleft = strlen(term_src);
2617 char *outbuf = term_dst;
2618 size_t outleft = sizeof(term_dst)-1;
2621 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2623 if (ret == (size_t)(-1))
2626 len = outbuf - term_dst;
2627 *dst = nmem_malloc(stream, len + 1);
2629 memcpy (*dst, term_dst, len);
2633 *dst = nmem_strdup(stream, term_src);
2636 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2642 yaz_log(YLOG_DEBUG, "count_set");
2644 rset->hits_limit = zh->approx_limit;
2647 rfd = rset_open(rset, RSETF_READ);
2648 while (rset_read(rfd, &key,0 /* never mind terms */))
2650 if (key.mem[0] != psysno)
2652 psysno = key.mem[0];
2653 if (rfd->counted_items >= rset->hits_limit)
2658 *count = rset->hits_count;
2661 #define RPN_MAX_ORDS 32
2663 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2664 oid_value attributeset,
2665 int num_bases, char **basenames,
2666 int *position, int *num_entries, ZebraScanEntry **list,
2667 int *is_partial, RSET limit_set, int return_zero)
2670 int pos = *position;
2671 int num = *num_entries;
2675 char termz[IT_MAX_WORD+20];
2676 struct scan_info *scan_info_array;
2677 ZebraScanEntry *glist;
2678 int ords[RPN_MAX_ORDS], ord_no = 0;
2679 int ptr[RPN_MAX_ORDS];
2681 unsigned index_type;
2682 char *search_type = NULL;
2683 char rank_type[128];
2686 NMEM rset_nmem = NULL;
2687 struct rset_key_control *kc = 0;
2692 if (attributeset == VAL_NONE)
2693 attributeset = VAL_BIB1;
2698 int termset_value_numeric;
2699 const char *termset_value_string;
2700 attr_init_APT(&termset, zapt, 8);
2701 termset_value_numeric =
2702 attr_find_ex(&termset, NULL, &termset_value_string);
2703 if (termset_value_numeric != -1)
2706 const char *termset_name = 0;
2708 if (termset_value_numeric != -2)
2711 sprintf(resname, "%d", termset_value_numeric);
2712 termset_name = resname;
2715 termset_name = termset_value_string;
2717 limit_set = resultSetRef (zh, termset_name);
2721 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2722 pos, num, attributeset);
2724 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2725 rank_type, &complete_flag, &sort_flag))
2728 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2731 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2735 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2737 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2738 basenames[base_no]);
2742 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2745 ords[ord_no++] = ord;
2752 /* prepare dictionary scanning */
2764 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2765 "after=%d before+after=%d",
2766 pos, num, before, after, before+after);
2767 scan_info_array = (struct scan_info *)
2768 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2769 for (i = 0; i < ord_no; i++)
2771 int j, prefix_len = 0;
2772 int before_tmp = before, after_tmp = after;
2773 struct scan_info *scan_info = scan_info_array + i;
2774 struct rpn_char_map_info rcmi;
2776 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2778 scan_info->before = before;
2779 scan_info->after = after;
2780 scan_info->odr = stream;
2782 scan_info->list = (struct scan_info_entry *)
2783 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2784 for (j = 0; j<before+after; j++)
2785 scan_info->list[j].term = NULL;
2787 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2788 termz[prefix_len] = 0;
2789 strcpy(scan_info->prefix, termz);
2791 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2795 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2796 scan_info, scan_handle);
2798 glist = (ZebraScanEntry *)
2799 odr_malloc(stream, (before+after)*sizeof(*glist));
2801 rset_nmem = nmem_create();
2802 kc = zebra_key_control_create(zh);
2804 /* consider terms after main term */
2805 for (i = 0; i < ord_no; i++)
2809 for (i = 0; i<after; i++)
2812 const char *mterm = NULL;
2815 int lo = i + pos-1; /* offset in result list */
2817 /* find: j0 is the first of the minimal values */
2818 for (j = 0; j < ord_no; j++)
2820 if (ptr[j] < before+after && ptr[j] >= 0 &&
2821 (tst = scan_info_array[j].list[ptr[j]].term) &&
2822 (!mterm || strcmp (tst, mterm) < 0))
2829 break; /* no value found, stop */
2831 /* get result set for first one , but only if it's within bounds */
2834 /* get result set for first term */
2835 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2836 &glist[lo].term, mterm);
2837 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2838 glist[lo].term, strlen(glist[lo].term),
2839 NULL, 0, zapt->term->which, rset_nmem,
2840 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2841 0 /* term_ref_id_str */);
2843 ptr[j0]++; /* move index for this set .. */
2844 /* get result set for remaining scan terms */
2845 for (j = j0+1; j<ord_no; j++)
2847 if (ptr[j] < before+after && ptr[j] >= 0 &&
2848 (tst = scan_info_array[j].list[ptr[j]].term) &&
2849 !strcmp (tst, mterm))
2858 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2860 strlen(glist[lo].term), NULL, 0,
2861 zapt->term->which,rset_nmem,
2862 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2863 0 /* term_ref_id_str */ );
2864 rset = rset_create_or(rset_nmem, kc,
2865 kc->scope, 0 /* termid */,
2874 /* merge with limit_set if given */
2879 rsets[1] = rset_dup(limit_set);
2881 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2884 count_set(zh, rset, &count);
2885 glist[lo].occurrences = count;
2891 *num_entries -= (after-i);
2893 if (*num_entries < 0)
2896 nmem_destroy(rset_nmem);
2901 /* consider terms before main term */
2902 for (i = 0; i<ord_no; i++)
2905 for (i = 0; i<before; i++)
2908 const char *mterm = NULL;
2911 int lo = before-1-i; /* offset in result list */
2914 for (j = 0; j <ord_no; j++)
2916 if (ptr[j] < before && ptr[j] >= 0 &&
2917 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2918 (!mterm || strcmp (tst, mterm) > 0))
2927 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2928 &glist[lo].term, mterm);
2931 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2932 glist[lo].term, strlen(glist[lo].term),
2933 NULL, 0, zapt->term->which, rset_nmem,
2934 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2935 0 /* term_ref_id_str */);
2939 for (j = j0+1; j<ord_no; j++)
2941 if (ptr[j] < before && ptr[j] >= 0 &&
2942 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2943 !strcmp (tst, mterm))
2948 rsets[1] = rset_trunc(
2950 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2952 strlen(glist[lo].term), NULL, 0,
2953 zapt->term->which, rset_nmem,
2954 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2955 0 /* term_ref_id_str */);
2956 rset = rset_create_or(rset_nmem, kc,
2957 kc->scope, 0 /* termid */, 2, rsets);
2966 rsets[1] = rset_dup(limit_set);
2968 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2970 count_set(zh, rset, &count);
2971 glist[lo].occurrences = count;
2975 nmem_destroy(rset_nmem);
2982 if (*num_entries <= 0)
2989 *list = glist + i; /* list is set to first 'real' entry */
2991 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2992 *position, *num_entries);
2999 * indent-tabs-mode: nil
3001 * vim: shiftwidth=4 tabstop=8 expandtab