1 /* $Id: rpnsearch.c,v 1.14 2007-05-14 14:05:21 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 static void add_non_space(const char *start, const char *end,
211 char *dst_term, int *dst_ptr,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
215 memcpy(dst_term + *dst_ptr, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
236 /* term_100: handle term, where trunc = none(no operators at all) */
237 static int term_100(ZebraMaps zebra_maps, int reg_type,
238 const char **src, WRBUF term_dict, int space_split,
246 const char *space_start = 0;
247 const char *space_end = 0;
249 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
256 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
260 if (**map == *CHR_SPACE)
263 else /* complete subfield only. */
265 if (**map == *CHR_SPACE)
266 { /* save space mapping for later .. */
271 else if (space_start)
272 { /* reload last space */
273 while (space_start < space_end)
275 if (strchr(REGEX_CHARS, *space_start))
276 wrbuf_putc(term_dict, '\\');
277 dst_term[j++] = *space_start;
278 wrbuf_putc(term_dict, *space_start);
283 space_start = space_end = 0;
288 add_non_space(s1, s0, term_dict, dst_term, &j,
296 /* term_101: handle term, where trunc = Process # */
297 static int term_101(ZebraMaps zebra_maps, int reg_type,
298 const char **src, WRBUF term_dict, int space_split,
306 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
314 wrbuf_puts(term_dict, ".*");
315 dst_term[j++] = *s0++;
321 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
323 if (space_split && **map == *CHR_SPACE)
327 add_non_space(s1, s0, term_dict, dst_term, &j,
331 dst_term[j++] = '\0';
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
338 WRBUF term_dict, int *errors, int space_split,
346 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
349 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
350 isdigit(((const unsigned char *)s0)[1]))
352 *errors = s0[1] - '0';
359 if (strchr("^\\()[].*+?|-", *s0))
362 wrbuf_putc(term_dict, *s0);
370 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
372 if (space_split && **map == *CHR_SPACE)
376 add_non_space(s1, s0, term_dict, dst_term, &j,
386 /* term_103: handle term, where trunc = re-1 (regular expressions) */
387 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
388 WRBUF term_dict, int space_split, char *dst_term)
390 return term_103(zebra_maps, reg_type, src, term_dict, NULL, space_split,
395 /* term_104: handle term, process # and ! */
396 static int term_104(ZebraMaps zebra_maps, int reg_type,
397 const char **src, WRBUF term_dict, int space_split,
405 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
413 dst_term[j++] = *s0++;
414 if (*s0 >= '0' && *s0 <= '9')
417 while (*s0 >= '0' && *s0 <= '9')
419 limit = limit * 10 + (*s0 - '0');
420 dst_term[j++] = *s0++;
426 wrbuf_puts(term_dict, ".?");
431 wrbuf_puts(term_dict, ".*");
437 wrbuf_puts(term_dict, ".*");
438 dst_term[j++] = *s0++;
443 wrbuf_puts(term_dict, ".");
444 dst_term[j++] = *s0++;
450 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
452 if (space_split && **map == *CHR_SPACE)
456 add_non_space(s1, s0, term_dict, dst_term, &j,
460 dst_term[j++] = '\0';
465 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
466 static int term_105(ZebraMaps zebra_maps, int reg_type,
467 const char **src, WRBUF term_dict, int space_split,
468 char *dst_term, int right_truncate)
475 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
483 wrbuf_puts(term_dict, ".*");
484 dst_term[j++] = *s0++;
489 wrbuf_putc(term_dict, '.');
490 dst_term[j++] = *s0++;
496 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
498 if (space_split && **map == *CHR_SPACE)
502 add_non_space(s1, s0, term_dict, dst_term, &j,
507 wrbuf_puts(term_dict, ".*");
508 dst_term[j++] = '\0';
514 /* gen_regular_rel - generate regular expression from relation
515 * val: border value (inclusive)
516 * islt: 1 if <=; 0 if >=.
518 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
520 char dst_buf[20*5*20]; /* assuming enough for expansion */
527 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
531 strcpy(dst, "(-[0-9]+|(");
539 strcpy(dst, "([0-9]+|-(");
550 sprintf(numstr, "%d", val);
551 for (w = strlen(numstr); --w >= 0; pos++)
570 strcpy(dst + dst_p, numstr);
571 dst_p = strlen(dst) - pos - 1;
599 for (i = 0; i<pos; i++)
612 /* match everything less than 10^(pos-1) */
614 for (i = 1; i<pos; i++)
615 strcat(dst, "[0-9]?");
619 /* match everything greater than 10^pos */
620 for (i = 0; i <= pos; i++)
621 strcat(dst, "[0-9]");
622 strcat(dst, "[0-9]*");
625 wrbuf_puts(term_dict, dst);
628 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
630 const char *src = wrbuf_cstr(wsrc);
631 if (src[*indx] == '\\')
633 wrbuf_putc(term_p, src[*indx]);
636 wrbuf_putc(term_p, src[*indx]);
641 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
642 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
643 * >= abc ([b-].*|a[c-].*|ab[c-].*)
644 * ([^-a].*|a[^-b].*|ab[c-].*)
645 * < abc ([-0].*|a[-a].*|ab[-b].*)
646 * ([^a-].*|a[^b-].*|ab[^c-].*)
647 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
648 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
650 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
651 const char **term_sub, WRBUF term_dict,
652 const Odr_oid *attributeSet,
653 int reg_type, int space_split, char *term_dst,
659 WRBUF term_component = wrbuf_alloc();
661 attr_init_APT(&relation, zapt, 2);
662 relation_value = attr_find(&relation, NULL);
665 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
666 switch (relation_value)
669 if (!term_100(zh->reg->zebra_maps, reg_type,
670 term_sub, term_component,
671 space_split, term_dst))
673 wrbuf_destroy(term_component);
676 yaz_log(log_level_rpn, "Relation <");
678 wrbuf_putc(term_dict, '(');
679 for (i = 0; i < wrbuf_len(term_component); )
684 wrbuf_putc(term_dict, '|');
686 string_rel_add_char(term_dict, term_component, &j);
688 wrbuf_putc(term_dict, '[');
690 wrbuf_putc(term_dict, '^');
692 wrbuf_putc(term_dict, 1);
693 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
695 string_rel_add_char(term_dict, term_component, &i);
696 wrbuf_putc(term_dict, '-');
698 wrbuf_putc(term_dict, ']');
699 wrbuf_putc(term_dict, '.');
700 wrbuf_putc(term_dict, '*');
702 wrbuf_putc(term_dict, ')');
705 if (!term_100(zh->reg->zebra_maps, reg_type,
706 term_sub, term_component,
707 space_split, term_dst))
709 wrbuf_destroy(term_component);
712 yaz_log(log_level_rpn, "Relation <=");
714 wrbuf_putc(term_dict, '(');
715 for (i = 0; i < wrbuf_len(term_component); )
720 string_rel_add_char(term_dict, term_component, &j);
721 wrbuf_putc(term_dict, '[');
723 wrbuf_putc(term_dict, '^');
725 wrbuf_putc(term_dict, 1);
726 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
728 string_rel_add_char(term_dict, term_component, &i);
729 wrbuf_putc(term_dict, '-');
731 wrbuf_putc(term_dict, ']');
732 wrbuf_putc(term_dict, '.');
733 wrbuf_putc(term_dict, '*');
735 wrbuf_putc(term_dict, '|');
737 for (i = 0; i < wrbuf_len(term_component); )
738 string_rel_add_char(term_dict, term_component, &i);
739 wrbuf_putc(term_dict, ')');
742 if (!term_100 (zh->reg->zebra_maps, reg_type,
743 term_sub, term_component, space_split, term_dst))
745 wrbuf_destroy(term_component);
748 yaz_log(log_level_rpn, "Relation >");
750 wrbuf_putc(term_dict, '(');
751 for (i = 0; i < wrbuf_len(term_component); )
756 string_rel_add_char(term_dict, term_component, &j);
757 wrbuf_putc(term_dict, '[');
759 wrbuf_putc(term_dict, '^');
760 wrbuf_putc(term_dict, '-');
761 string_rel_add_char(term_dict, term_component, &i);
763 wrbuf_putc(term_dict, ']');
764 wrbuf_putc(term_dict, '.');
765 wrbuf_putc(term_dict, '*');
767 wrbuf_putc(term_dict, '|');
769 for (i = 0; i < wrbuf_len(term_component); )
770 string_rel_add_char(term_dict, term_component, &i);
771 wrbuf_putc(term_dict, '.');
772 wrbuf_putc(term_dict, '+');
773 wrbuf_putc(term_dict, ')');
776 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
777 term_component, space_split, term_dst))
779 wrbuf_destroy(term_component);
782 yaz_log(log_level_rpn, "Relation >=");
784 wrbuf_putc(term_dict, '(');
785 for (i = 0; i < wrbuf_len(term_component); )
790 wrbuf_putc(term_dict, '|');
792 string_rel_add_char(term_dict, term_component, &j);
793 wrbuf_putc(term_dict, '[');
795 if (i < wrbuf_len(term_component)-1)
797 wrbuf_putc(term_dict, '^');
798 wrbuf_putc(term_dict, '-');
799 string_rel_add_char(term_dict, term_component, &i);
803 string_rel_add_char(term_dict, term_component, &i);
804 wrbuf_putc(term_dict, '-');
806 wrbuf_putc(term_dict, ']');
807 wrbuf_putc(term_dict, '.');
808 wrbuf_putc(term_dict, '*');
810 wrbuf_putc(term_dict, ')');
817 yaz_log(log_level_rpn, "Relation =");
818 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
819 term_component, space_split, term_dst))
821 wrbuf_destroy(term_component);
824 wrbuf_puts(term_dict, "(");
825 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
826 wrbuf_puts(term_dict, ")");
829 yaz_log(log_level_rpn, "Relation always matches");
830 /* skip to end of term (we don't care what it is) */
831 while (**term_sub != '\0')
835 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
836 wrbuf_destroy(term_component);
839 wrbuf_destroy(term_component);
843 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
844 const char **term_sub,
846 const Odr_oid *attributeSet, NMEM stream,
847 struct grep_info *grep_info,
848 int reg_type, int complete_flag,
849 int num_bases, char **basenames,
851 const char *xpath_use,
852 struct ord_list **ol);
854 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
855 Z_AttributesPlusTerm *zapt,
856 zint *hits_limit_value,
857 const char **term_ref_id_str,
860 AttrType term_ref_id_attr;
861 AttrType hits_limit_attr;
864 attr_init_APT(&hits_limit_attr, zapt, 11);
865 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
867 attr_init_APT(&term_ref_id_attr, zapt, 10);
868 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
869 if (term_ref_id_int >= 0)
871 char *res = nmem_malloc(nmem, 20);
872 sprintf(res, "%d", term_ref_id_int);
873 *term_ref_id_str = res;
876 /* no limit given ? */
877 if (*hits_limit_value == -1)
879 if (*term_ref_id_str)
881 /* use global if term_ref is present */
882 *hits_limit_value = zh->approx_limit;
886 /* no counting if term_ref is not present */
887 *hits_limit_value = 0;
890 else if (*hits_limit_value == 0)
892 /* 0 is the same as global limit */
893 *hits_limit_value = zh->approx_limit;
895 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
896 *term_ref_id_str ? *term_ref_id_str : "none",
901 static ZEBRA_RES term_trunc(ZebraHandle zh,
902 Z_AttributesPlusTerm *zapt,
903 const char **term_sub,
904 const Odr_oid *attributeSet, NMEM stream,
905 struct grep_info *grep_info,
906 int reg_type, int complete_flag,
907 int num_bases, char **basenames,
909 const char *rank_type,
910 const char *xpath_use,
913 struct rset_key_control *kc)
917 zint hits_limit_value;
918 const char *term_ref_id_str = 0;
919 WRBUF term_dict = wrbuf_alloc();
922 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
923 grep_info->isam_p_indx = 0;
924 res = string_term(zh, zapt, term_sub, term_dict,
925 attributeSet, stream, grep_info,
926 reg_type, complete_flag, num_bases, basenames,
927 term_dst, xpath_use, &ol);
928 wrbuf_destroy(term_dict);
931 if (!*term_sub) /* no more terms ? */
933 yaz_log(log_level_rpn, "term: %s", term_dst);
934 *rset = rset_trunc(zh, grep_info->isam_p_buf,
935 grep_info->isam_p_indx, term_dst,
936 strlen(term_dst), rank_type, 1 /* preserve pos */,
937 zapt->term->which, rset_nmem,
938 kc, kc->scope, ol, reg_type, hits_limit_value,
945 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
946 const char **term_sub,
948 const Odr_oid *attributeSet, NMEM stream,
949 struct grep_info *grep_info,
950 int reg_type, int complete_flag,
951 int num_bases, char **basenames,
953 const char *xpath_use,
954 struct ord_list **ol)
958 int truncation_value;
960 struct rpn_char_map_info rcmi;
961 int space_split = complete_flag ? 0 : 1;
963 int bases_ok = 0; /* no of databases with OK attribute */
965 *ol = ord_list_create(stream);
967 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
968 attr_init_APT(&truncation, zapt, 5);
969 truncation_value = attr_find(&truncation, NULL);
970 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
972 for (base_no = 0; base_no < num_bases; base_no++)
976 int max_pos, prefix_len = 0;
981 termp = *term_sub; /* start of term for each database */
984 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
986 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
991 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
992 attributeSet, &ord) != ZEBRA_OK)
996 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1000 *ol = ord_list_append(stream, *ol, ord);
1001 ord_len = key_SU_encode (ord, ord_buf);
1003 wrbuf_putc(term_dict, '(');
1005 for (i = 0; i<ord_len; i++)
1007 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1008 wrbuf_putc(term_dict, ord_buf[i]);
1010 wrbuf_putc(term_dict, ')');
1012 prefix_len = wrbuf_len(term_dict);
1014 switch (truncation_value)
1016 case -1: /* not specified */
1017 case 100: /* do not truncate */
1018 if (!string_relation(zh, zapt, &termp, term_dict,
1020 reg_type, space_split, term_dst,
1025 zebra_setError(zh, relation_error, 0);
1032 case 1: /* right truncation */
1033 wrbuf_putc(term_dict, '(');
1034 if (!term_100(zh->reg->zebra_maps, reg_type,
1035 &termp, term_dict, space_split, term_dst))
1040 wrbuf_puts(term_dict, ".*)");
1042 case 2: /* keft truncation */
1043 wrbuf_puts(term_dict, "(.*");
1044 if (!term_100(zh->reg->zebra_maps, reg_type,
1045 &termp, term_dict, space_split, term_dst))
1050 wrbuf_putc(term_dict, ')');
1052 case 3: /* left&right truncation */
1053 wrbuf_puts(term_dict, "(.*");
1054 if (!term_100(zh->reg->zebra_maps, reg_type,
1055 &termp, term_dict, space_split, term_dst))
1060 wrbuf_puts(term_dict, ".*)");
1062 case 101: /* process # in term */
1063 wrbuf_putc(term_dict, '(');
1064 if (!term_101(zh->reg->zebra_maps, reg_type,
1065 &termp, term_dict, space_split, term_dst))
1070 wrbuf_puts(term_dict, ")");
1072 case 102: /* Regexp-1 */
1073 wrbuf_putc(term_dict, '(');
1074 if (!term_102(zh->reg->zebra_maps, reg_type,
1075 &termp, term_dict, space_split, term_dst))
1080 wrbuf_putc(term_dict, ')');
1082 case 103: /* Regexp-2 */
1084 wrbuf_putc(term_dict, '(');
1085 if (!term_103(zh->reg->zebra_maps, reg_type,
1086 &termp, term_dict, ®ex_range,
1087 space_split, term_dst))
1092 wrbuf_putc(term_dict, ')');
1094 case 104: /* process # and ! in term */
1095 wrbuf_putc(term_dict, '(');
1096 if (!term_104(zh->reg->zebra_maps, reg_type,
1097 &termp, term_dict, space_split, term_dst))
1102 wrbuf_putc(term_dict, ')');
1104 case 105: /* process * and ! in term */
1105 wrbuf_putc(term_dict, '(');
1106 if (!term_105(zh->reg->zebra_maps, reg_type,
1107 &termp, term_dict, space_split, term_dst, 1))
1112 wrbuf_putc(term_dict, ')');
1114 case 106: /* process * and ! in term */
1115 wrbuf_putc(term_dict, '(');
1116 if (!term_105(zh->reg->zebra_maps, reg_type,
1117 &termp, term_dict, space_split, term_dst, 0))
1122 wrbuf_putc(term_dict, ')');
1125 zebra_setError_zint(zh,
1126 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1133 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1134 esc_str(buf, sizeof(buf), input, strlen(input));
1136 yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1137 wrbuf_cstr(term_dict) + prefix_len);
1138 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1139 grep_info, &max_pos,
1140 ord_len /* number of "exact" chars */,
1143 zebra_set_partial_result(zh);
1145 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1150 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1156 static void grep_info_delete(struct grep_info *grep_info)
1159 xfree(grep_info->term_no);
1161 xfree(grep_info->isam_p_buf);
1164 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1165 Z_AttributesPlusTerm *zapt,
1166 struct grep_info *grep_info,
1170 grep_info->term_no = 0;
1172 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1173 grep_info->isam_p_size = 0;
1174 grep_info->isam_p_buf = NULL;
1176 grep_info->reg_type = reg_type;
1177 grep_info->termset = 0;
1183 attr_init_APT(&truncmax, zapt, 13);
1184 truncmax_value = attr_find(&truncmax, NULL);
1185 if (truncmax_value != -1)
1186 grep_info->trunc_max = truncmax_value;
1191 int termset_value_numeric;
1192 const char *termset_value_string;
1194 attr_init_APT(&termset, zapt, 8);
1195 termset_value_numeric =
1196 attr_find_ex(&termset, NULL, &termset_value_string);
1197 if (termset_value_numeric != -1)
1200 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1204 const char *termset_name = 0;
1205 if (termset_value_numeric != -2)
1208 sprintf(resname, "%d", termset_value_numeric);
1209 termset_name = resname;
1212 termset_name = termset_value_string;
1213 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1214 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1215 if (!grep_info->termset)
1217 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1227 \brief Create result set(s) for list of terms
1228 \param zh Zebra Handle
1229 \param zapt Attributes Plust Term (RPN leaf)
1230 \param termz term as used in query but converted to UTF-8
1231 \param attributeSet default attribute set
1232 \param stream memory for result
1233 \param reg_type register type ('w', 'p',..)
1234 \param complete_flag whether it's phrases or not
1235 \param rank_type term flags for ranking
1236 \param xpath_use use attribute for X-Path (-1 for no X-path)
1237 \param num_bases number of databases
1238 \param basenames array of databases
1239 \param rset_nmem memory for result sets
1240 \param result_sets output result set for each term in list (output)
1241 \param num_result_sets number of output result sets
1242 \param kc rset key control to be used for created result sets
1244 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1245 Z_AttributesPlusTerm *zapt,
1247 const Odr_oid *attributeSet,
1249 int reg_type, int complete_flag,
1250 const char *rank_type,
1251 const char *xpath_use,
1252 int num_bases, char **basenames,
1254 RSET **result_sets, int *num_result_sets,
1255 struct rset_key_control *kc)
1257 char term_dst[IT_MAX_WORD+1];
1258 struct grep_info grep_info;
1259 const char *termp = termz;
1262 *num_result_sets = 0;
1264 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1270 if (alloc_sets == *num_result_sets)
1273 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1276 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277 alloc_sets = alloc_sets + add;
1278 *result_sets = rnew;
1280 res = term_trunc(zh, zapt, &termp, attributeSet,
1282 reg_type, complete_flag,
1283 num_bases, basenames,
1284 term_dst, rank_type,
1285 xpath_use, rset_nmem,
1286 &(*result_sets)[*num_result_sets],
1288 if (res != ZEBRA_OK)
1291 for (i = 0; i < *num_result_sets; i++)
1292 rset_delete((*result_sets)[i]);
1293 grep_info_delete (&grep_info);
1296 if ((*result_sets)[*num_result_sets] == 0)
1298 (*num_result_sets)++;
1303 grep_info_delete(&grep_info);
1307 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1308 Z_AttributesPlusTerm *zapt,
1309 const Odr_oid *attributeSet,
1311 int num_bases, char **basenames,
1314 struct rset_key_control *kc)
1322 attr_init_APT(&position, zapt, 3);
1323 position_value = attr_find(&position, NULL);
1324 switch(position_value)
1333 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1338 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1340 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1345 if (!zh->reg->isamb && !zh->reg->isamc)
1347 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1351 f_set = xmalloc(sizeof(RSET) * num_bases);
1352 for (base_no = 0; base_no < num_bases; base_no++)
1356 char term_dict[100];
1361 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1363 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1364 basenames[base_no]);
1368 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1369 attributeSet, &ord) != ZEBRA_OK)
1372 ord_len = key_SU_encode (ord, ord_buf);
1373 memcpy(term_dict, ord_buf, ord_len);
1374 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1375 val = dict_lookup(zh->reg->dict, term_dict);
1378 assert(*val == sizeof(ISAM_P));
1379 memcpy(&isam_p, val+1, sizeof(isam_p));
1383 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1384 zh->reg->isamb, isam_p, 0);
1385 else if (zh->reg->isamc)
1386 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1387 zh->reg->isamc, isam_p, 0);
1391 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1392 0 /* termid */, num_sets, f_set);
1398 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1399 Z_AttributesPlusTerm *zapt,
1400 const char *termz_org,
1401 const Odr_oid *attributeSet,
1403 int reg_type, int complete_flag,
1404 const char *rank_type,
1405 const char *xpath_use,
1406 int num_bases, char **basenames,
1409 struct rset_key_control *kc)
1411 RSET *result_sets = 0;
1412 int num_result_sets = 0;
1414 term_list_trunc(zh, zapt, termz_org, attributeSet,
1415 stream, reg_type, complete_flag,
1416 rank_type, xpath_use,
1417 num_bases, basenames,
1419 &result_sets, &num_result_sets, kc);
1421 if (res != ZEBRA_OK)
1424 if (num_result_sets > 0)
1427 res = rpn_search_APT_position(zh, zapt, attributeSet,
1429 num_bases, basenames,
1430 rset_nmem, &first_set,
1432 if (res != ZEBRA_OK)
1436 RSET *nsets = nmem_malloc(stream,
1437 sizeof(RSET) * (num_result_sets+1));
1438 nsets[0] = first_set;
1439 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1440 result_sets = nsets;
1444 if (num_result_sets == 0)
1445 *rset = rset_create_null(rset_nmem, kc, 0);
1446 else if (num_result_sets == 1)
1447 *rset = result_sets[0];
1449 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1450 num_result_sets, result_sets,
1451 1 /* ordered */, 0 /* exclusion */,
1452 3 /* relation */, 1 /* distance */);
1458 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1459 Z_AttributesPlusTerm *zapt,
1460 const char *termz_org,
1461 const Odr_oid *attributeSet,
1463 int reg_type, int complete_flag,
1464 const char *rank_type,
1465 const char *xpath_use,
1466 int num_bases, char **basenames,
1469 struct rset_key_control *kc)
1471 RSET *result_sets = 0;
1472 int num_result_sets = 0;
1475 term_list_trunc(zh, zapt, termz_org, attributeSet,
1476 stream, reg_type, complete_flag,
1477 rank_type, xpath_use,
1478 num_bases, basenames,
1480 &result_sets, &num_result_sets, kc);
1481 if (res != ZEBRA_OK)
1484 for (i = 0; i<num_result_sets; i++)
1487 res = rpn_search_APT_position(zh, zapt, attributeSet,
1489 num_bases, basenames,
1490 rset_nmem, &first_set,
1492 if (res != ZEBRA_OK)
1494 for (i = 0; i<num_result_sets; i++)
1495 rset_delete(result_sets[i]);
1503 tmp_set[0] = first_set;
1504 tmp_set[1] = result_sets[i];
1506 result_sets[i] = rset_create_prox(
1507 rset_nmem, kc, kc->scope,
1509 1 /* ordered */, 0 /* exclusion */,
1510 3 /* relation */, 1 /* distance */);
1513 if (num_result_sets == 0)
1514 *rset = rset_create_null(rset_nmem, kc, 0);
1515 else if (num_result_sets == 1)
1516 *rset = result_sets[0];
1518 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1519 num_result_sets, result_sets);
1525 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1526 Z_AttributesPlusTerm *zapt,
1527 const char *termz_org,
1528 const Odr_oid *attributeSet,
1530 int reg_type, int complete_flag,
1531 const char *rank_type,
1532 const char *xpath_use,
1533 int num_bases, char **basenames,
1536 struct rset_key_control *kc)
1538 RSET *result_sets = 0;
1539 int num_result_sets = 0;
1542 term_list_trunc(zh, zapt, termz_org, attributeSet,
1543 stream, reg_type, complete_flag,
1544 rank_type, xpath_use,
1545 num_bases, basenames,
1547 &result_sets, &num_result_sets,
1549 if (res != ZEBRA_OK)
1551 for (i = 0; i<num_result_sets; i++)
1554 res = rpn_search_APT_position(zh, zapt, attributeSet,
1556 num_bases, basenames,
1557 rset_nmem, &first_set,
1559 if (res != ZEBRA_OK)
1561 for (i = 0; i<num_result_sets; i++)
1562 rset_delete(result_sets[i]);
1570 tmp_set[0] = first_set;
1571 tmp_set[1] = result_sets[i];
1573 result_sets[i] = rset_create_prox(
1574 rset_nmem, kc, kc->scope,
1576 1 /* ordered */, 0 /* exclusion */,
1577 3 /* relation */, 1 /* distance */);
1582 if (num_result_sets == 0)
1583 *rset = rset_create_null(rset_nmem, kc, 0);
1584 else if (num_result_sets == 1)
1585 *rset = result_sets[0];
1587 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1588 num_result_sets, result_sets);
1594 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1595 const char **term_sub,
1597 const Odr_oid *attributeSet,
1598 struct grep_info *grep_info,
1608 WRBUF term_num = wrbuf_alloc();
1611 attr_init_APT(&relation, zapt, 2);
1612 relation_value = attr_find(&relation, NULL);
1614 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1616 switch (relation_value)
1619 yaz_log(log_level_rpn, "Relation <");
1620 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1623 wrbuf_destroy(term_num);
1626 term_value = atoi (wrbuf_cstr(term_num));
1627 gen_regular_rel(term_dict, term_value-1, 1);
1630 yaz_log(log_level_rpn, "Relation <=");
1631 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1634 wrbuf_destroy(term_num);
1637 term_value = atoi (wrbuf_cstr(term_num));
1638 gen_regular_rel(term_dict, term_value, 1);
1641 yaz_log(log_level_rpn, "Relation >=");
1642 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1645 wrbuf_destroy(term_num);
1648 term_value = atoi (wrbuf_cstr(term_num));
1649 gen_regular_rel(term_dict, term_value, 0);
1652 yaz_log(log_level_rpn, "Relation >");
1653 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1656 wrbuf_destroy(term_num);
1659 term_value = atoi (wrbuf_cstr(term_num));
1660 gen_regular_rel(term_dict, term_value+1, 0);
1664 yaz_log(log_level_rpn, "Relation =");
1665 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1668 wrbuf_destroy(term_num);
1671 term_value = atoi (wrbuf_cstr(term_num));
1672 wrbuf_printf(term_dict, "(0*%d)", term_value);
1675 /* term_tmp untouched.. */
1676 while (**term_sub != '\0')
1680 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1681 wrbuf_destroy(term_num);
1684 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1685 0, grep_info, max_pos, 0, grep_handle);
1688 zebra_set_partial_result(zh);
1690 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1691 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1692 wrbuf_destroy(term_num);
1696 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1697 const char **term_sub,
1699 const Odr_oid *attributeSet, NMEM stream,
1700 struct grep_info *grep_info,
1701 int reg_type, int complete_flag,
1702 int num_bases, char **basenames,
1704 const char *xpath_use,
1705 struct ord_list **ol)
1709 struct rpn_char_map_info rcmi;
1711 int bases_ok = 0; /* no of databases with OK attribute */
1713 *ol = ord_list_create(stream);
1715 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1717 for (base_no = 0; base_no < num_bases; base_no++)
1720 int relation_error = 0;
1721 int ord, ord_len, i;
1726 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1728 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1729 basenames[base_no]);
1733 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1734 attributeSet, &ord) != ZEBRA_OK)
1738 wrbuf_rewind(term_dict);
1740 *ol = ord_list_append(stream, *ol, ord);
1742 ord_len = key_SU_encode (ord, ord_buf);
1744 wrbuf_putc(term_dict, '(');
1745 for (i = 0; i < ord_len; i++)
1747 wrbuf_putc(term_dict, 1);
1748 wrbuf_putc(term_dict, ord_buf[i]);
1750 wrbuf_putc(term_dict, ')');
1752 if (!numeric_relation(zh, zapt, &termp, term_dict,
1753 attributeSet, grep_info, &max_pos, reg_type,
1754 term_dst, &relation_error))
1758 zebra_setError(zh, relation_error, 0);
1768 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1773 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1774 Z_AttributesPlusTerm *zapt,
1776 const Odr_oid *attributeSet,
1778 int reg_type, int complete_flag,
1779 const char *rank_type,
1780 const char *xpath_use,
1781 int num_bases, char **basenames,
1784 struct rset_key_control *kc)
1786 char term_dst[IT_MAX_WORD+1];
1787 const char *termp = termz;
1788 RSET *result_sets = 0;
1789 int num_result_sets = 0;
1791 struct grep_info grep_info;
1793 zint hits_limit_value;
1794 const char *term_ref_id_str = 0;
1796 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1798 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1799 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1803 struct ord_list *ol;
1804 WRBUF term_dict = wrbuf_alloc();
1805 if (alloc_sets == num_result_sets)
1808 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1811 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1812 alloc_sets = alloc_sets + add;
1815 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1816 grep_info.isam_p_indx = 0;
1817 res = numeric_term(zh, zapt, &termp, term_dict,
1818 attributeSet, stream, &grep_info,
1819 reg_type, complete_flag, num_bases, basenames,
1820 term_dst, xpath_use, &ol);
1821 wrbuf_destroy(term_dict);
1822 if (res == ZEBRA_FAIL || termp == 0)
1824 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1825 result_sets[num_result_sets] =
1826 rset_trunc(zh, grep_info.isam_p_buf,
1827 grep_info.isam_p_indx, term_dst,
1828 strlen(term_dst), rank_type,
1829 0 /* preserve position */,
1830 zapt->term->which, rset_nmem,
1831 kc, kc->scope, ol, reg_type,
1834 if (!result_sets[num_result_sets])
1840 grep_info_delete(&grep_info);
1842 if (res != ZEBRA_OK)
1844 if (num_result_sets == 0)
1845 *rset = rset_create_null(rset_nmem, kc, 0);
1846 else if (num_result_sets == 1)
1847 *rset = result_sets[0];
1849 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1850 num_result_sets, result_sets);
1856 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1857 Z_AttributesPlusTerm *zapt,
1859 const Odr_oid *attributeSet,
1861 const char *rank_type, NMEM rset_nmem,
1863 struct rset_key_control *kc)
1866 zint sysno = atozint(termz);
1870 rec = rec_get(zh->reg->records, sysno);
1878 *rset = rset_create_null(rset_nmem, kc, 0);
1884 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1885 res_get(zh->res, "setTmpDir"), 0);
1886 rsfd = rset_open(*rset, RSETF_WRITE);
1891 rset_write(rsfd, &key);
1897 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1898 const Odr_oid *attributeSet, NMEM stream,
1899 Z_SortKeySpecList *sort_sequence,
1900 const char *rank_type,
1903 struct rset_key_control *kc)
1906 int sort_relation_value;
1907 AttrType sort_relation_type;
1912 attr_init_APT(&sort_relation_type, zapt, 7);
1913 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1915 if (!sort_sequence->specs)
1917 sort_sequence->num_specs = 10;
1918 sort_sequence->specs = (Z_SortKeySpec **)
1919 nmem_malloc(stream, sort_sequence->num_specs *
1920 sizeof(*sort_sequence->specs));
1921 for (i = 0; i<sort_sequence->num_specs; i++)
1922 sort_sequence->specs[i] = 0;
1924 if (zapt->term->which != Z_Term_general)
1927 i = atoi_n ((char *) zapt->term->u.general->buf,
1928 zapt->term->u.general->len);
1929 if (i >= sort_sequence->num_specs)
1931 sprintf(termz, "%d", i);
1933 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1934 sks->sortElement = (Z_SortElement *)
1935 nmem_malloc(stream, sizeof(*sks->sortElement));
1936 sks->sortElement->which = Z_SortElement_generic;
1937 sk = sks->sortElement->u.generic = (Z_SortKey *)
1938 nmem_malloc(stream, sizeof(*sk));
1939 sk->which = Z_SortKey_sortAttributes;
1940 sk->u.sortAttributes = (Z_SortAttributes *)
1941 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1943 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1944 sk->u.sortAttributes->list = zapt->attributes;
1946 sks->sortRelation = (int *)
1947 nmem_malloc(stream, sizeof(*sks->sortRelation));
1948 if (sort_relation_value == 1)
1949 *sks->sortRelation = Z_SortKeySpec_ascending;
1950 else if (sort_relation_value == 2)
1951 *sks->sortRelation = Z_SortKeySpec_descending;
1953 *sks->sortRelation = Z_SortKeySpec_ascending;
1955 sks->caseSensitivity = (int *)
1956 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1957 *sks->caseSensitivity = 0;
1959 sks->which = Z_SortKeySpec_null;
1960 sks->u.null = odr_nullval ();
1961 sort_sequence->specs[i] = sks;
1962 *rset = rset_create_null(rset_nmem, kc, 0);
1967 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1968 const Odr_oid *attributeSet,
1969 struct xpath_location_step *xpath, int max,
1972 const Odr_oid *curAttributeSet = attributeSet;
1974 const char *use_string = 0;
1976 attr_init_APT(&use, zapt, 1);
1977 attr_find_ex(&use, &curAttributeSet, &use_string);
1979 if (!use_string || *use_string != '/')
1982 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1987 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1988 int reg_type, const char *term,
1989 const char *xpath_use,
1991 struct rset_key_control *kc)
1993 struct grep_info grep_info;
1994 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1995 zinfo_index_category_index,
1996 reg_type, xpath_use);
1997 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1998 return rset_create_null(rset_nmem, kc, 0);
2001 return rset_create_null(rset_nmem, kc, 0);
2007 WRBUF term_dict = wrbuf_alloc();
2008 int ord_len = key_SU_encode (ord, ord_buf);
2009 int term_type = Z_Term_characterString;
2010 const char *flags = "void";
2012 wrbuf_putc(term_dict, '(');
2013 for (i = 0; i<ord_len; i++)
2015 wrbuf_putc(term_dict, 1);
2016 wrbuf_putc(term_dict, ord_buf[i]);
2018 wrbuf_putc(term_dict, ')');
2019 wrbuf_puts(term_dict, term);
2021 grep_info.isam_p_indx = 0;
2022 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2023 &grep_info, &max_pos, 0, grep_handle);
2024 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2025 grep_info.isam_p_indx);
2026 rset = rset_trunc(zh, grep_info.isam_p_buf,
2027 grep_info.isam_p_indx, term, strlen(term),
2028 flags, 1, term_type, rset_nmem,
2029 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2030 0 /* term_ref_id_str */);
2031 grep_info_delete(&grep_info);
2032 wrbuf_destroy(term_dict);
2038 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2039 int num_bases, char **basenames,
2040 NMEM stream, const char *rank_type, RSET rset,
2041 int xpath_len, struct xpath_location_step *xpath,
2044 struct rset_key_control *kc)
2048 int always_matches = rset ? 0 : 1;
2056 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2057 for (i = 0; i<xpath_len; i++)
2059 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2071 a[@attr = value]/b[@other = othervalue]
2073 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2074 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2075 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2076 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2077 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2078 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2082 dict_grep_cmap (zh->reg->dict, 0, 0);
2084 for (base_no = 0; base_no < num_bases; base_no++)
2086 int level = xpath_len;
2089 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2091 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2092 basenames[base_no]);
2096 while (--level >= 0)
2098 WRBUF xpath_rev = wrbuf_alloc();
2100 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2102 for (i = level; i >= 1; --i)
2104 const char *cp = xpath[i].part;
2110 wrbuf_puts(xpath_rev, "[^/]*");
2111 else if (*cp == ' ')
2112 wrbuf_puts(xpath_rev, "\001 ");
2114 wrbuf_putc(xpath_rev, *cp);
2116 /* wrbuf_putc does not null-terminate , but
2117 wrbuf_puts below ensures it does.. so xpath_rev
2118 is OK iff length is > 0 */
2120 wrbuf_puts(xpath_rev, "/");
2122 else if (i == 1) /* // case */
2123 wrbuf_puts(xpath_rev, ".*");
2125 if (xpath[level].predicate &&
2126 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2127 xpath[level].predicate->u.relation.name[0])
2129 WRBUF wbuf = wrbuf_alloc();
2130 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2131 if (xpath[level].predicate->u.relation.value)
2133 const char *cp = xpath[level].predicate->u.relation.value;
2134 wrbuf_putc(wbuf, '=');
2138 if (strchr(REGEX_CHARS, *cp))
2139 wrbuf_putc(wbuf, '\\');
2140 wrbuf_putc(wbuf, *cp);
2144 rset_attr = xpath_trunc(
2145 zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME,
2147 wrbuf_destroy(wbuf);
2153 wrbuf_destroy(xpath_rev);
2157 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2158 wrbuf_cstr(xpath_rev));
2159 if (wrbuf_len(xpath_rev))
2161 rset_start_tag = xpath_trunc(zh, stream, '0',
2162 wrbuf_cstr(xpath_rev),
2163 ZEBRA_XPATH_ELM_BEGIN,
2166 rset = rset_start_tag;
2169 rset_end_tag = xpath_trunc(zh, stream, '0',
2170 wrbuf_cstr(xpath_rev),
2171 ZEBRA_XPATH_ELM_END,
2174 rset = rset_create_between(rset_nmem, kc, kc->scope,
2175 rset_start_tag, rset,
2176 rset_end_tag, rset_attr);
2179 wrbuf_destroy(xpath_rev);
2187 #define MAX_XPATH_STEPS 10
2189 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2190 const Odr_oid *attributeSet, NMEM stream,
2191 Z_SortKeySpecList *sort_sequence,
2192 int num_bases, char **basenames,
2195 struct rset_key_control *kc)
2197 ZEBRA_RES res = ZEBRA_OK;
2199 char *search_type = NULL;
2200 char rank_type[128];
2203 char termz[IT_MAX_WORD+1];
2205 const char *xpath_use = 0;
2206 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2210 log_level_rpn = yaz_log_module_level("rpn");
2213 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2214 rank_type, &complete_flag, &sort_flag);
2216 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2217 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2218 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2219 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2221 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2225 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2226 rank_type, rset_nmem, rset, kc);
2227 /* consider if an X-Path query is used */
2228 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2229 xpath, MAX_XPATH_STEPS, stream);
2232 if (xpath[xpath_len-1].part[0] == '@')
2233 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2235 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2242 attr_init_APT(&relation, zapt, 2);
2243 relation_value = attr_find(&relation, NULL);
2245 if (relation_value == 103) /* alwaysmatches */
2247 *rset = 0; /* signal no "term" set */
2248 return rpn_search_xpath(zh, num_bases, basenames,
2249 stream, rank_type, *rset,
2250 xpath_len, xpath, rset_nmem, rset, kc);
2255 /* search using one of the various search type strategies
2256 termz is our UTF-8 search term
2257 attributeSet is top-level default attribute set
2258 stream is ODR for search
2259 reg_id is the register type
2260 complete_flag is 1 for complete subfield, 0 for incomplete
2261 xpath_use is use-attribute to be used for X-Path search, 0 for none
2263 if (!strcmp(search_type, "phrase"))
2265 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2266 reg_id, complete_flag, rank_type,
2268 num_bases, basenames, rset_nmem,
2271 else if (!strcmp(search_type, "and-list"))
2273 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2274 reg_id, complete_flag, rank_type,
2276 num_bases, basenames, rset_nmem,
2279 else if (!strcmp(search_type, "or-list"))
2281 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2282 reg_id, complete_flag, rank_type,
2284 num_bases, basenames, rset_nmem,
2287 else if (!strcmp(search_type, "local"))
2289 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2290 rank_type, rset_nmem, rset, kc);
2292 else if (!strcmp(search_type, "numeric"))
2294 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2295 reg_id, complete_flag, rank_type,
2297 num_bases, basenames, rset_nmem,
2302 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2305 if (res != ZEBRA_OK)
2309 return rpn_search_xpath(zh, num_bases, basenames,
2310 stream, rank_type, *rset,
2311 xpath_len, xpath, rset_nmem, rset, kc);
2314 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2315 const Odr_oid *attributeSet,
2316 NMEM stream, NMEM rset_nmem,
2317 Z_SortKeySpecList *sort_sequence,
2318 int num_bases, char **basenames,
2319 RSET **result_sets, int *num_result_sets,
2320 Z_Operator *parent_op,
2321 struct rset_key_control *kc);
2323 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2326 ZEBRA_RES res = ZEBRA_OK;
2327 if (zs->which == Z_RPNStructure_complex)
2329 if (res == ZEBRA_OK)
2330 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2332 if (res == ZEBRA_OK)
2333 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2336 else if (zs->which == Z_RPNStructure_simple)
2338 if (zs->u.simple->which == Z_Operand_APT)
2340 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2341 AttrType global_hits_limit_attr;
2344 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2346 l = attr_find(&global_hits_limit_attr, NULL);
2354 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2355 const Odr_oid *attributeSet,
2356 NMEM stream, NMEM rset_nmem,
2357 Z_SortKeySpecList *sort_sequence,
2358 int num_bases, char **basenames,
2361 RSET *result_sets = 0;
2362 int num_result_sets = 0;
2364 struct rset_key_control *kc = zebra_key_control_create(zh);
2366 res = rpn_search_structure(zh, zs, attributeSet,
2369 num_bases, basenames,
2370 &result_sets, &num_result_sets,
2371 0 /* no parent op */,
2373 if (res != ZEBRA_OK)
2376 for (i = 0; i<num_result_sets; i++)
2377 rset_delete(result_sets[i]);
2382 assert(num_result_sets == 1);
2383 assert(result_sets);
2384 assert(*result_sets);
2385 *result_set = *result_sets;
2391 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2392 const Odr_oid *attributeSet,
2393 NMEM stream, NMEM rset_nmem,
2394 Z_SortKeySpecList *sort_sequence,
2395 int num_bases, char **basenames,
2396 RSET **result_sets, int *num_result_sets,
2397 Z_Operator *parent_op,
2398 struct rset_key_control *kc)
2400 *num_result_sets = 0;
2401 if (zs->which == Z_RPNStructure_complex)
2404 Z_Operator *zop = zs->u.complex->roperator;
2405 RSET *result_sets_l = 0;
2406 int num_result_sets_l = 0;
2407 RSET *result_sets_r = 0;
2408 int num_result_sets_r = 0;
2410 res = rpn_search_structure(zh, zs->u.complex->s1,
2411 attributeSet, stream, rset_nmem,
2413 num_bases, basenames,
2414 &result_sets_l, &num_result_sets_l,
2416 if (res != ZEBRA_OK)
2419 for (i = 0; i<num_result_sets_l; i++)
2420 rset_delete(result_sets_l[i]);
2423 res = rpn_search_structure(zh, zs->u.complex->s2,
2424 attributeSet, stream, rset_nmem,
2426 num_bases, basenames,
2427 &result_sets_r, &num_result_sets_r,
2429 if (res != ZEBRA_OK)
2432 for (i = 0; i<num_result_sets_l; i++)
2433 rset_delete(result_sets_l[i]);
2434 for (i = 0; i<num_result_sets_r; i++)
2435 rset_delete(result_sets_r[i]);
2439 /* make a new list of result for all children */
2440 *num_result_sets = num_result_sets_l + num_result_sets_r;
2441 *result_sets = nmem_malloc(stream, *num_result_sets *
2442 sizeof(**result_sets));
2443 memcpy(*result_sets, result_sets_l,
2444 num_result_sets_l * sizeof(**result_sets));
2445 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2446 num_result_sets_r * sizeof(**result_sets));
2448 if (!parent_op || parent_op->which != zop->which
2449 || (zop->which != Z_Operator_and &&
2450 zop->which != Z_Operator_or))
2452 /* parent node different from this one (or non-present) */
2453 /* we must combine result sets now */
2457 case Z_Operator_and:
2458 rset = rset_create_and(rset_nmem, kc,
2460 *num_result_sets, *result_sets);
2463 rset = rset_create_or(rset_nmem, kc,
2464 kc->scope, 0, /* termid */
2465 *num_result_sets, *result_sets);
2467 case Z_Operator_and_not:
2468 rset = rset_create_not(rset_nmem, kc,
2473 case Z_Operator_prox:
2474 if (zop->u.prox->which != Z_ProximityOperator_known)
2477 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2481 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2483 zebra_setError_zint(zh,
2484 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2485 *zop->u.prox->u.known);
2490 rset = rset_create_prox(rset_nmem, kc,
2492 *num_result_sets, *result_sets,
2493 *zop->u.prox->ordered,
2494 (!zop->u.prox->exclusion ?
2495 0 : *zop->u.prox->exclusion),
2496 *zop->u.prox->relationType,
2497 *zop->u.prox->distance );
2501 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2504 *num_result_sets = 1;
2505 *result_sets = nmem_malloc(stream, *num_result_sets *
2506 sizeof(**result_sets));
2507 (*result_sets)[0] = rset;
2510 else if (zs->which == Z_RPNStructure_simple)
2515 if (zs->u.simple->which == Z_Operand_APT)
2517 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2518 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2519 attributeSet, stream, sort_sequence,
2520 num_bases, basenames, rset_nmem, &rset,
2522 if (res != ZEBRA_OK)
2525 else if (zs->u.simple->which == Z_Operand_resultSetId)
2527 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2528 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2532 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2533 zs->u.simple->u.resultSetId);
2540 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2543 *num_result_sets = 1;
2544 *result_sets = nmem_malloc(stream, *num_result_sets *
2545 sizeof(**result_sets));
2546 (*result_sets)[0] = rset;
2550 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2561 * indent-tabs-mode: nil
2563 * vim: shiftwidth=4 tabstop=8 expandtab