f11dcd8bce92636e0c07340b1af3b59d478080ff
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT
75
76 struct grep_info {
77 #ifdef TERM_COUNT
78     int *term_no;
79 #endif
80     ISAM_P *isam_p_buf;
81     int isam_p_size;
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106         int *new_term_no;
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k < in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 /* ICU sort keys seem to be of the form
238    basechars \x01 accents \x01 length
239    For now we'll just right truncate from basechars . This
240    may give false hits due to accents not being used.
241 */
242 static size_t icu_basechars(const char *buf, size_t i)
243 {
244     while (i > 0 && buf[--i] != '\x01') /* skip length */
245         ;
246     while (i > 0 && buf[--i] != '\x01') /* skip accents */
247         ;
248     return i; /* only basechars left */
249 }
250
251 static int term_102_icu(zebra_map_t zm,
252                         const char **src, WRBUF term_dict, int space_split,
253                         WRBUF display_term)
254 {
255     int no_terms = 0;
256     const char *s0 = *src, *s1;
257     while (*s0 == ' ')
258         s0++;
259     s1 = s0;
260     for (;;)
261     {
262         if (*s1 == ' ' && space_split)
263             break;
264         else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
265             s1++;
266         else
267         {
268             /* EOF or regex reserved char */
269             if (s0 != s1)
270             {
271                 const char *res_buf = 0;
272                 size_t res_len = 0;
273                 const char *display_buf;
274                 size_t display_len;
275
276                 zebra_map_tokenize_start(zm, s0, s1 - s0);
277
278                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279                                             &display_buf, &display_len))
280                 {
281                     size_t i;
282                     res_len = icu_basechars(res_buf, res_len);
283                     for (i = 0; i < res_len; i++)
284                     {
285                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
286                             wrbuf_putc(term_dict, '\\');
287                         if (res_buf[i] < 32)
288                             wrbuf_putc(term_dict, '\x01');
289
290                         wrbuf_putc(term_dict, res_buf[i]);
291                     }
292                     wrbuf_write(display_term, display_buf, display_len);
293
294                     no_terms++;
295                 }
296             }
297             if (*s1 == '\0')
298                 break;
299
300             wrbuf_putc(term_dict, *s1);
301             wrbuf_putc(display_term, *s1);
302
303             s1++;
304             s0 = s1;
305         }
306     }
307     if (no_terms)
308         wrbuf_puts(term_dict, "\x01\x01.*");
309     *src = s1;
310     return no_terms;
311 }
312
313 static int term_100_icu(zebra_map_t zm,
314                         const char **src, WRBUF term_dict,
315                         WRBUF display_term,
316                         int mode,
317                         size_t token_number)
318 {
319     size_t i;
320     const char *res_buf = 0;
321     size_t res_len = 0;
322     const char *display_buf;
323     size_t display_len;
324
325     zebra_map_tokenize_start(zm, *src, strlen(*src));
326     for (i = 0; i <= token_number; i++)
327     {
328         if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
329                                      &display_buf, &display_len))
330             return 0;
331     }
332     wrbuf_write(display_term, display_buf, display_len);
333     if (mode)
334     {
335         res_len = icu_basechars(res_buf, res_len);
336     }
337     if (mode & 2)
338         wrbuf_puts(term_dict, ".*");
339     for (i = 0; i < res_len; i++)
340     {
341         if (strchr(REGEX_CHARS "\\", res_buf[i]))
342             wrbuf_putc(term_dict, '\\');
343         if (res_buf[i] < 32)
344             wrbuf_putc(term_dict, '\x01');
345
346         wrbuf_putc(term_dict, res_buf[i]);
347     }
348     if (mode & 1)
349         wrbuf_puts(term_dict, ".*");
350     else if (mode)
351         wrbuf_puts(term_dict, "\x01\x01.*");
352     return 1;
353 }
354
355 /* term_100: handle term, where trunc = none(no operators at all) */
356 static int term_100(zebra_map_t zm,
357                     const char **src, WRBUF term_dict, int space_split,
358                     WRBUF display_term)
359 {
360     const char *s0;
361     const char **map;
362     int i = 0;
363
364     const char *space_start = 0;
365     const char *space_end = 0;
366
367     if (!term_pre(zm, src, 0, !space_split))
368         return 0;
369     s0 = *src;
370     while (*s0)
371     {
372         const char *s1 = s0;
373         int q_map_match = 0;
374         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
375         if (space_split)
376         {
377             if (**map == *CHR_SPACE)
378                 break;
379         }
380         else  /* complete subfield only. */
381         {
382             if (**map == *CHR_SPACE)
383             {   /* save space mapping for later  .. */
384                 space_start = s1;
385                 space_end = s0;
386                 continue;
387             }
388             else if (space_start)
389             {   /* reload last space */
390                 while (space_start < space_end)
391                 {
392                     if (strchr(REGEX_CHARS, *space_start))
393                         wrbuf_putc(term_dict, '\\');
394                     wrbuf_putc(display_term, *space_start);
395                     wrbuf_putc(term_dict, *space_start);
396                     space_start++;
397
398                 }
399                 /* and reset */
400                 space_start = space_end = 0;
401             }
402         }
403         i++;
404
405         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
406     }
407     *src = s0;
408     return i;
409 }
410
411 /* term_101: handle term, where trunc = Process # */
412 static int term_101(zebra_map_t zm,
413                     const char **src, WRBUF term_dict, int space_split,
414                     WRBUF display_term)
415 {
416     const char *s0;
417     const char **map;
418     int i = 0;
419
420     if (!term_pre(zm, src, "#", !space_split))
421         return 0;
422     s0 = *src;
423     while (*s0)
424     {
425         if (*s0 == '#')
426         {
427             i++;
428             wrbuf_puts(term_dict, ".*");
429             wrbuf_putc(display_term, *s0);
430             s0++;
431         }
432         else
433         {
434             const char *s1 = s0;
435             int q_map_match = 0;
436             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
437             if (space_split && **map == *CHR_SPACE)
438                 break;
439
440             i++;
441             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
442         }
443     }
444     *src = s0;
445     return i;
446 }
447
448 /* term_103: handle term, where trunc = re-2 (regular expressions) */
449 static int term_103(zebra_map_t zm, const char **src,
450                     WRBUF term_dict, int *errors, int space_split,
451                     WRBUF display_term)
452 {
453     int i = 0;
454     const char *s0;
455     const char **map;
456
457     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
458         return 0;
459     s0 = *src;
460     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
461         isdigit(((const unsigned char *)s0)[1]))
462     {
463         *errors = s0[1] - '0';
464         s0 += 3;
465         if (*errors > 3)
466             *errors = 3;
467     }
468     while (*s0)
469     {
470         if (strchr("^\\()[].*+?|-", *s0))
471         {
472             wrbuf_putc(display_term, *s0);
473             wrbuf_putc(term_dict, *s0);
474             s0++;
475             i++;
476         }
477         else
478         {
479             const char *s1 = s0;
480             int q_map_match = 0;
481             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
482             if (space_split && **map == *CHR_SPACE)
483                 break;
484
485             i++;
486             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
487         }
488     }
489     *src = s0;
490
491     return i;
492 }
493
494 /* term_103: handle term, where trunc = re-1 (regular expressions) */
495 static int term_102(zebra_map_t zm, const char **src,
496                     WRBUF term_dict, int space_split, WRBUF display_term)
497 {
498     return term_103(zm, src, term_dict, NULL, space_split, display_term);
499 }
500
501
502 /* term_104: handle term, process ?n * # */
503 static int term_104(zebra_map_t zm, const char **src,
504                     WRBUF term_dict, int space_split, WRBUF display_term)
505 {
506     const char *s0;
507     const char **map;
508     int i = 0;
509
510     if (!term_pre(zm, src, "?*#", !space_split))
511         return 0;
512     s0 = *src;
513     while (*s0)
514     {
515         if (*s0 == '?')
516         {
517             i++;
518             wrbuf_putc(display_term, *s0);
519             s0++;
520             if (*s0 >= '0' && *s0 <= '9')
521             {
522                 int limit = 0;
523                 while (*s0 >= '0' && *s0 <= '9')
524                 {
525                     limit = limit * 10 + (*s0 - '0');
526                     wrbuf_putc(display_term, *s0);
527                     s0++;
528                 }
529                 if (limit > 20)
530                     limit = 20;
531                 while (--limit >= 0)
532                 {
533                     wrbuf_puts(term_dict, ".?");
534                 }
535             }
536             else
537             {
538                 wrbuf_puts(term_dict, ".*");
539             }
540         }
541         else if (*s0 == '*')
542         {
543             i++;
544             wrbuf_puts(term_dict, ".*");
545             wrbuf_putc(display_term, *s0);
546             s0++;
547         }
548         else if (*s0 == '#')
549         {
550             i++;
551             wrbuf_puts(term_dict, ".");
552             wrbuf_putc(display_term, *s0);
553             s0++;
554         }
555         else
556         {
557             const char *s1 = s0;
558             int q_map_match = 0;
559             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
560             if (space_split && **map == *CHR_SPACE)
561                 break;
562
563             i++;
564             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
565         }
566     }
567     *src = s0;
568     return i;
569 }
570
571 /* term_105/106: handle term, process * ! and possibly right_truncate */
572 static int term_105(zebra_map_t zm, const char **src,
573                     WRBUF term_dict, int space_split,
574                     WRBUF display_term, int right_truncate)
575 {
576     const char *s0;
577     const char **map;
578     int i = 0;
579
580     if (!term_pre(zm, src, "\\*!", !space_split))
581         return 0;
582     s0 = *src;
583     while (*s0)
584     {
585         if (*s0 == '*')
586         {
587             i++;
588             wrbuf_puts(term_dict, ".*");
589             wrbuf_putc(display_term, *s0);
590             s0++;
591         }
592         else if (*s0 == '!')
593         {
594             i++;
595             wrbuf_putc(term_dict, '.');
596             wrbuf_putc(display_term, *s0);
597             s0++;
598         }
599         else if (*s0 == '\\')
600         {
601             i++;
602             wrbuf_puts(term_dict, "\\\\");
603             wrbuf_putc(display_term, *s0);
604             s0++;
605         }
606         else
607         {
608             const char *s1 = s0;
609             int q_map_match = 0;
610             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
611             if (space_split && **map == *CHR_SPACE)
612                 break;
613
614             i++;
615             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
616         }
617     }
618     if (right_truncate)
619         wrbuf_puts(term_dict, ".*");
620     *src = s0;
621     return i;
622 }
623
624
625 /* gen_regular_rel - generate regular expression from relation
626  *  val:     border value (inclusive)
627  *  islt:    1 if <=; 0 if >=.
628  */
629 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
630 {
631     char dst_buf[20*5*20]; /* assuming enough for expansion */
632     char *dst = dst_buf;
633     int dst_p;
634     int w, d, i;
635     int pos = 0;
636     char numstr[20];
637
638     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
639     if (val >= 0)
640     {
641         if (islt)
642             strcpy(dst, "(-[0-9]+|(");
643         else
644             strcpy(dst, "((");
645     }
646     else
647     {
648         if (!islt)
649         {
650             strcpy(dst, "([0-9]+|-(");
651             islt = 1;
652         }
653         else
654         {
655             strcpy(dst, "(-(");
656             islt = 0;
657         }
658         val = -val;
659     }
660     dst_p = strlen(dst);
661     sprintf(numstr, "%d", val);
662     for (w = strlen(numstr); --w >= 0; pos++)
663     {
664         d = numstr[w];
665         if (pos > 0)
666         {
667             if (islt)
668             {
669                 if (d == '0')
670                     continue;
671                 d--;
672             }
673             else
674             {
675                 if (d == '9')
676                     continue;
677                 d++;
678             }
679         }
680
681         strcpy(dst + dst_p, numstr);
682         dst_p = strlen(dst) - pos - 1;
683
684         if (islt)
685         {
686             if (d != '0')
687             {
688                 dst[dst_p++] = '[';
689                 dst[dst_p++] = '0';
690                 dst[dst_p++] = '-';
691                 dst[dst_p++] = d;
692                 dst[dst_p++] = ']';
693             }
694             else
695                 dst[dst_p++] = d;
696         }
697         else
698         {
699             if (d != '9')
700             {
701                 dst[dst_p++] = '[';
702                 dst[dst_p++] = d;
703                 dst[dst_p++] = '-';
704                 dst[dst_p++] = '9';
705                 dst[dst_p++] = ']';
706             }
707             else
708                 dst[dst_p++] = d;
709         }
710         for (i = 0; i < pos; i++)
711         {
712             dst[dst_p++] = '[';
713             dst[dst_p++] = '0';
714             dst[dst_p++] = '-';
715             dst[dst_p++] = '9';
716             dst[dst_p++] = ']';
717         }
718         dst[dst_p++] = '|';
719     }
720     dst[dst_p] = '\0';
721     if (islt)
722     {
723         /* match everything less than 10^(pos-1) */
724         strcat(dst, "0*");
725         for (i = 1; i < pos; i++)
726             strcat(dst, "[0-9]?");
727     }
728     else
729     {
730         /* match everything greater than 10^pos */
731         for (i = 0; i <= pos; i++)
732             strcat(dst, "[0-9]");
733         strcat(dst, "[0-9]*");
734     }
735     strcat(dst, "))");
736     wrbuf_puts(term_dict, dst);
737 }
738
739 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
740 {
741     const char *src = wrbuf_cstr(wsrc);
742     if (src[*indx] == '\\')
743     {
744         wrbuf_putc(term_p, src[*indx]);
745         (*indx)++;
746     }
747     wrbuf_putc(term_p, src[*indx]);
748     (*indx)++;
749 }
750
751 /*
752  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
753  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
754  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
755  *              ([^-a].*|a[^-b].*|ab[c-].*)
756  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
757  *              ([^a-].*|a[^b-].*|ab[^c-].*)
758  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
759  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
760  */
761 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
762                            const char **term_sub, WRBUF term_dict,
763                            const Odr_oid *attributeSet,
764                            zebra_map_t zm, int space_split,
765                            WRBUF display_term,
766                            int *error_code)
767 {
768     AttrType relation;
769     int relation_value;
770     int i;
771     WRBUF term_component = wrbuf_alloc();
772
773     attr_init_APT(&relation, zapt, 2);
774     relation_value = attr_find(&relation, NULL);
775
776     *error_code = 0;
777     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
778     switch (relation_value)
779     {
780     case 1:
781         if (!term_100(zm, term_sub, term_component, space_split, display_term))
782         {
783             wrbuf_destroy(term_component);
784             return 0;
785         }
786         yaz_log(log_level_rpn, "Relation <");
787
788         wrbuf_putc(term_dict, '(');
789         for (i = 0; i < wrbuf_len(term_component); )
790         {
791             int j = 0;
792
793             if (i)
794                 wrbuf_putc(term_dict, '|');
795             while (j < i)
796                 string_rel_add_char(term_dict, term_component, &j);
797
798             wrbuf_putc(term_dict, '[');
799
800             wrbuf_putc(term_dict, '^');
801
802             wrbuf_putc(term_dict, 1);
803             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
804
805             string_rel_add_char(term_dict, term_component, &i);
806             wrbuf_putc(term_dict, '-');
807
808             wrbuf_putc(term_dict, ']');
809             wrbuf_putc(term_dict, '.');
810             wrbuf_putc(term_dict, '*');
811         }
812         wrbuf_putc(term_dict, ')');
813         break;
814     case 2:
815         if (!term_100(zm, term_sub, term_component, space_split, display_term))
816         {
817             wrbuf_destroy(term_component);
818             return 0;
819         }
820         yaz_log(log_level_rpn, "Relation <=");
821
822         wrbuf_putc(term_dict, '(');
823         for (i = 0; i < wrbuf_len(term_component); )
824         {
825             int j = 0;
826
827             while (j < i)
828                 string_rel_add_char(term_dict, term_component, &j);
829             wrbuf_putc(term_dict, '[');
830
831             wrbuf_putc(term_dict, '^');
832
833             wrbuf_putc(term_dict, 1);
834             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
835
836             string_rel_add_char(term_dict, term_component, &i);
837             wrbuf_putc(term_dict, '-');
838
839             wrbuf_putc(term_dict, ']');
840             wrbuf_putc(term_dict, '.');
841             wrbuf_putc(term_dict, '*');
842
843             wrbuf_putc(term_dict, '|');
844         }
845         for (i = 0; i < wrbuf_len(term_component); )
846             string_rel_add_char(term_dict, term_component, &i);
847         wrbuf_putc(term_dict, ')');
848         break;
849     case 5:
850         if (!term_100(zm, term_sub, term_component, space_split, display_term))
851         {
852             wrbuf_destroy(term_component);
853             return 0;
854         }
855         yaz_log(log_level_rpn, "Relation >");
856
857         wrbuf_putc(term_dict, '(');
858         for (i = 0; i < wrbuf_len(term_component); )
859         {
860             int j = 0;
861
862             while (j < i)
863                 string_rel_add_char(term_dict, term_component, &j);
864             wrbuf_putc(term_dict, '[');
865
866             wrbuf_putc(term_dict, '^');
867             wrbuf_putc(term_dict, '-');
868             string_rel_add_char(term_dict, term_component, &i);
869
870             wrbuf_putc(term_dict, ']');
871             wrbuf_putc(term_dict, '.');
872             wrbuf_putc(term_dict, '*');
873
874             wrbuf_putc(term_dict, '|');
875         }
876         for (i = 0; i < wrbuf_len(term_component); )
877             string_rel_add_char(term_dict, term_component, &i);
878         wrbuf_putc(term_dict, '.');
879         wrbuf_putc(term_dict, '+');
880         wrbuf_putc(term_dict, ')');
881         break;
882     case 4:
883         if (!term_100(zm, term_sub, term_component, space_split, display_term))
884         {
885             wrbuf_destroy(term_component);
886             return 0;
887         }
888         yaz_log(log_level_rpn, "Relation >=");
889
890         wrbuf_putc(term_dict, '(');
891         for (i = 0; i < wrbuf_len(term_component); )
892         {
893             int j = 0;
894
895             if (i)
896                 wrbuf_putc(term_dict, '|');
897             while (j < i)
898                 string_rel_add_char(term_dict, term_component, &j);
899             wrbuf_putc(term_dict, '[');
900
901             if (i < wrbuf_len(term_component)-1)
902             {
903                 wrbuf_putc(term_dict, '^');
904                 wrbuf_putc(term_dict, '-');
905                 string_rel_add_char(term_dict, term_component, &i);
906             }
907             else
908             {
909                 string_rel_add_char(term_dict, term_component, &i);
910                 wrbuf_putc(term_dict, '-');
911             }
912             wrbuf_putc(term_dict, ']');
913             wrbuf_putc(term_dict, '.');
914             wrbuf_putc(term_dict, '*');
915         }
916         wrbuf_putc(term_dict, ')');
917         break;
918     case 3:
919     case 102:
920     case -1:
921         if (!**term_sub)
922             return 1;
923         yaz_log(log_level_rpn, "Relation =");
924         if (!term_100(zm, term_sub, term_component, space_split, display_term))
925         {
926             wrbuf_destroy(term_component);
927             return 0;
928         }
929         wrbuf_puts(term_dict, "(");
930         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
931         wrbuf_puts(term_dict, ")");
932         break;
933     case 103:
934         yaz_log(log_level_rpn, "Relation always matches");
935         /* skip to end of term (we don't care what it is) */
936         while (**term_sub != '\0')
937             (*term_sub)++;
938         break;
939     default:
940         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
941         wrbuf_destroy(term_component);
942         return 0;
943     }
944     wrbuf_destroy(term_component);
945     return 1;
946 }
947
948 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
949                              const char **term_sub,
950                              WRBUF term_dict,
951                              const Odr_oid *attributeSet, NMEM stream,
952                              struct grep_info *grep_info,
953                              const char *index_type, int complete_flag,
954                              WRBUF display_term,
955                              const char *xpath_use,
956                              struct ord_list **ol,
957                              zebra_map_t zm, size_t token_number);
958
959 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
960                                 Z_AttributesPlusTerm *zapt,
961                                 zint *hits_limit_value,
962                                 const char **term_ref_id_str,
963                                 NMEM nmem)
964 {
965     AttrType term_ref_id_attr;
966     AttrType hits_limit_attr;
967     int term_ref_id_int;
968     zint hits_limit_from_attr;
969
970     attr_init_APT(&hits_limit_attr, zapt, 11);
971     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
972
973     attr_init_APT(&term_ref_id_attr, zapt, 10);
974     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
975     if (term_ref_id_int >= 0)
976     {
977         char *res = nmem_malloc(nmem, 20);
978         sprintf(res, "%d", term_ref_id_int);
979         *term_ref_id_str = res;
980     }
981     if (hits_limit_from_attr != -1)
982         *hits_limit_value = hits_limit_from_attr;
983
984     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
985             *term_ref_id_str ? *term_ref_id_str : "none",
986             *hits_limit_value);
987     return ZEBRA_OK;
988 }
989
990 /** \brief search for term (which may be truncated)
991  */
992 static ZEBRA_RES search_term(ZebraHandle zh,
993                              Z_AttributesPlusTerm *zapt,
994                              const char **term_sub,
995                              const Odr_oid *attributeSet,
996                              zint hits_limit, NMEM stream,
997                              struct grep_info *grep_info,
998                              const char *index_type, int complete_flag,
999                              const char *rank_type,
1000                              const char *xpath_use,
1001                              NMEM rset_nmem,
1002                              RSET *rset,
1003                              struct rset_key_control *kc,
1004                              zebra_map_t zm,
1005                              size_t token_number)
1006 {
1007     ZEBRA_RES res;
1008     struct ord_list *ol;
1009     zint hits_limit_value = hits_limit;
1010     const char *term_ref_id_str = 0;
1011     WRBUF term_dict = wrbuf_alloc();
1012     WRBUF display_term = wrbuf_alloc();
1013     *rset = 0;
1014     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1015                           stream);
1016     grep_info->isam_p_indx = 0;
1017     res = string_term(zh, zapt, term_sub, term_dict,
1018                       attributeSet, stream, grep_info,
1019                       index_type, complete_flag,
1020                       display_term, xpath_use, &ol, zm, token_number);
1021     wrbuf_destroy(term_dict);
1022     if (res == ZEBRA_OK && *term_sub)
1023     {
1024         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1025         *rset = rset_trunc(zh, grep_info->isam_p_buf,
1026                            grep_info->isam_p_indx, wrbuf_buf(display_term),
1027                            wrbuf_len(display_term), rank_type,
1028                            1 /* preserve pos */,
1029                            zapt->term->which, rset_nmem,
1030                            kc, kc->scope, ol, index_type, hits_limit_value,
1031                            term_ref_id_str);
1032         if (!*rset)
1033             res = ZEBRA_FAIL;
1034     }
1035     wrbuf_destroy(display_term);
1036     return res;
1037 }
1038
1039 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1040                              const char **term_sub,
1041                              WRBUF term_dict,
1042                              const Odr_oid *attributeSet, NMEM stream,
1043                              struct grep_info *grep_info,
1044                              const char *index_type, int complete_flag,
1045                              WRBUF display_term,
1046                              const char *xpath_use,
1047                              struct ord_list **ol,
1048                              zebra_map_t zm, size_t token_number)
1049 {
1050     int r;
1051     AttrType truncation;
1052     int truncation_value;
1053     const char *termp;
1054     struct rpn_char_map_info rcmi;
1055
1056     int space_split = complete_flag ? 0 : 1;
1057     int ord = -1;
1058     int regex_range = 0;
1059     int max_pos, prefix_len = 0;
1060     int relation_error;
1061     char ord_buf[32];
1062     int ord_len, i;
1063
1064     *ol = ord_list_create(stream);
1065
1066     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1067     attr_init_APT(&truncation, zapt, 5);
1068     truncation_value = attr_find(&truncation, NULL);
1069     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1070
1071     termp = *term_sub; /* start of term for each database */
1072
1073     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1074                           attributeSet, &ord) != ZEBRA_OK)
1075     {
1076         *term_sub = 0;
1077         return ZEBRA_FAIL;
1078     }
1079
1080     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1081
1082     *ol = ord_list_append(stream, *ol, ord);
1083     ord_len = key_SU_encode(ord, ord_buf);
1084
1085     wrbuf_putc(term_dict, '(');
1086
1087     for (i = 0; i < ord_len; i++)
1088     {
1089         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1090         wrbuf_putc(term_dict, ord_buf[i]);
1091     }
1092     wrbuf_putc(term_dict, ')');
1093
1094     prefix_len = wrbuf_len(term_dict);
1095
1096     if (zebra_maps_is_icu(zm))
1097     {
1098         int relation_value;
1099         AttrType relation;
1100
1101         attr_init_APT(&relation, zapt, 2);
1102         relation_value = attr_find(&relation, NULL);
1103         if (relation_value == 103) /* always matches */
1104             termp += strlen(termp); /* move to end of term */
1105         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1106         {
1107             /* ICU case */
1108             switch (truncation_value)
1109             {
1110             case -1:         /* not specified */
1111             case 100:        /* do not truncate */
1112                 if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
1113                 {
1114                     *term_sub = 0;
1115                     return ZEBRA_OK;
1116                 }
1117                 break;
1118             case 102:
1119                 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1120                 {
1121                     *term_sub = 0;
1122                     return ZEBRA_OK;
1123                 }
1124                 break;
1125             case 1:          /* right truncation */
1126                 if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
1127                 {
1128                     *term_sub = 0;
1129                     return ZEBRA_OK;
1130                 }
1131                 break;
1132             case 2:
1133                 if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
1134                 {
1135                     *term_sub = 0;
1136                     return ZEBRA_OK;
1137                 }
1138                 break;
1139             case 3:
1140                 if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
1141                 {
1142                     *term_sub = 0;
1143                     return ZEBRA_OK;
1144                 }
1145                 break;
1146             default:
1147                 zebra_setError_zint(zh,
1148                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1149                                     truncation_value);
1150                 return ZEBRA_FAIL;
1151             }
1152         }
1153         else
1154         {
1155             zebra_setError_zint(zh,
1156                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1157                                 relation_value);
1158             return ZEBRA_FAIL;
1159         }
1160     }
1161     else
1162     {
1163         /* non-ICU case. using string.chr and friends */
1164         switch (truncation_value)
1165         {
1166         case -1:         /* not specified */
1167         case 100:        /* do not truncate */
1168             if (!string_relation(zh, zapt, &termp, term_dict,
1169                                  attributeSet,
1170                                  zm, space_split, display_term,
1171                                  &relation_error))
1172             {
1173                 if (relation_error)
1174                 {
1175                     zebra_setError(zh, relation_error, 0);
1176                     return ZEBRA_FAIL;
1177                 }
1178                 *term_sub = 0;
1179                 return ZEBRA_OK;
1180             }
1181             break;
1182         case 1:          /* right truncation */
1183             wrbuf_putc(term_dict, '(');
1184             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1185             {
1186                 *term_sub = 0;
1187                 return ZEBRA_OK;
1188             }
1189             wrbuf_puts(term_dict, ".*)");
1190             break;
1191         case 2:          /* left truncation */
1192             wrbuf_puts(term_dict, "(.*");
1193             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1194             {
1195                 *term_sub = 0;
1196                 return ZEBRA_OK;
1197             }
1198             wrbuf_putc(term_dict, ')');
1199             break;
1200         case 3:          /* left&right truncation */
1201             wrbuf_puts(term_dict, "(.*");
1202             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1203             {
1204                 *term_sub = 0;
1205                 return ZEBRA_OK;
1206             }
1207             wrbuf_puts(term_dict, ".*)");
1208             break;
1209         case 101:        /* process # in term */
1210             wrbuf_putc(term_dict, '(');
1211             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1212             {
1213                 *term_sub = 0;
1214                 return ZEBRA_OK;
1215             }
1216             wrbuf_puts(term_dict, ")");
1217             break;
1218         case 102:        /* Regexp-1 */
1219             wrbuf_putc(term_dict, '(');
1220             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1221             {
1222                 *term_sub = 0;
1223                 return ZEBRA_OK;
1224             }
1225             wrbuf_putc(term_dict, ')');
1226             break;
1227         case 103:       /* Regexp-2 */
1228             regex_range = 1;
1229             wrbuf_putc(term_dict, '(');
1230             if (!term_103(zm, &termp, term_dict, &regex_range,
1231                           space_split, display_term))
1232             {
1233                 *term_sub = 0;
1234                 return ZEBRA_OK;
1235             }
1236             wrbuf_putc(term_dict, ')');
1237             break;
1238         case 104:        /* process ?n * # term */
1239             wrbuf_putc(term_dict, '(');
1240             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1241             {
1242                 *term_sub = 0;
1243                 return ZEBRA_OK;
1244             }
1245             wrbuf_putc(term_dict, ')');
1246             break;
1247         case 105:        /* process * ! in term and right truncate */
1248             wrbuf_putc(term_dict, '(');
1249             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1250             {
1251                 *term_sub = 0;
1252                 return ZEBRA_OK;
1253             }
1254             wrbuf_putc(term_dict, ')');
1255             break;
1256         case 106:        /* process * ! in term */
1257             wrbuf_putc(term_dict, '(');
1258             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1259             {
1260                 *term_sub = 0;
1261                 return ZEBRA_OK;
1262             }
1263             wrbuf_putc(term_dict, ')');
1264             break;
1265         default:
1266             zebra_setError_zint(zh,
1267                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1268                                 truncation_value);
1269             return ZEBRA_FAIL;
1270         }
1271     }
1272     if (1)
1273     {
1274         char buf[1000];
1275         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1276         esc_str(buf, sizeof(buf), input, strlen(input));
1277     }
1278     {
1279         WRBUF pr_wr = wrbuf_alloc();
1280
1281         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1282         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1283         wrbuf_destroy(pr_wr);
1284     }
1285     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1286                          grep_info, &max_pos,
1287                          ord_len /* number of "exact" chars */,
1288                          grep_handle);
1289     if (r == 1)
1290         zebra_set_partial_result(zh);
1291     else if (r)
1292         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1293     *term_sub = termp;
1294     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1295     return ZEBRA_OK;
1296 }
1297
1298
1299
1300 static void grep_info_delete(struct grep_info *grep_info)
1301 {
1302 #ifdef TERM_COUNT
1303     xfree(grep_info->term_no);
1304 #endif
1305     xfree(grep_info->isam_p_buf);
1306 }
1307
1308 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1309                                    Z_AttributesPlusTerm *zapt,
1310                                    struct grep_info *grep_info,
1311                                    const char *index_type)
1312 {
1313 #ifdef TERM_COUNT
1314     grep_info->term_no = 0;
1315 #endif
1316     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1317     grep_info->isam_p_size = 0;
1318     grep_info->isam_p_buf = NULL;
1319     grep_info->zh = zh;
1320     grep_info->index_type = index_type;
1321     grep_info->termset = 0;
1322     if (zapt)
1323     {
1324         AttrType truncmax;
1325         int truncmax_value;
1326
1327         attr_init_APT(&truncmax, zapt, 13);
1328         truncmax_value = attr_find(&truncmax, NULL);
1329         if (truncmax_value != -1)
1330             grep_info->trunc_max = truncmax_value;
1331     }
1332     if (zapt)
1333     {
1334         AttrType termset;
1335         int termset_value_numeric;
1336         const char *termset_value_string;
1337
1338         attr_init_APT(&termset, zapt, 8);
1339         termset_value_numeric =
1340             attr_find_ex(&termset, NULL, &termset_value_string);
1341         if (termset_value_numeric != -1)
1342         {
1343 #if TERMSET_DISABLE
1344             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1345             return ZEBRA_FAIL;
1346 #else
1347             char resname[32];
1348             const char *termset_name = 0;
1349             if (termset_value_numeric != -2)
1350             {
1351
1352                 sprintf(resname, "%d", termset_value_numeric);
1353                 termset_name = resname;
1354             }
1355             else
1356                 termset_name = termset_value_string;
1357             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359             if (!grep_info->termset)
1360             {
1361                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1362                 return ZEBRA_FAIL;
1363             }
1364 #endif
1365         }
1366     }
1367     return ZEBRA_OK;
1368 }
1369
1370 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1371                                      Z_AttributesPlusTerm *zapt,
1372                                      const char *termz,
1373                                      const Odr_oid *attributeSet,
1374                                      zint hits_limit,
1375                                      NMEM stream,
1376                                      const char *index_type, int complete_flag,
1377                                      const char *rank_type,
1378                                      const char *xpath_use,
1379                                      NMEM rset_nmem,
1380                                      RSET **result_sets, int *num_result_sets,
1381                                      struct rset_key_control *kc,
1382                                      zebra_map_t zm)
1383 {
1384     struct grep_info grep_info;
1385     const char *termp = termz;
1386     int alloc_sets = 0;
1387
1388     *num_result_sets = 0;
1389     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1390         return ZEBRA_FAIL;
1391     while (1)
1392     {
1393         ZEBRA_RES res;
1394
1395         if (alloc_sets == *num_result_sets)
1396         {
1397             int add = 10;
1398             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1399                                               sizeof(*rnew));
1400             if (alloc_sets)
1401                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1402             alloc_sets = alloc_sets + add;
1403             *result_sets = rnew;
1404         }
1405         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1406                           stream, &grep_info,
1407                           index_type, complete_flag,
1408                           rank_type,
1409                           xpath_use, rset_nmem,
1410                           &(*result_sets)[*num_result_sets],
1411                           kc, zm,
1412                           *num_result_sets);
1413         if (res != ZEBRA_OK)
1414         {
1415             int i;
1416             for (i = 0; i < *num_result_sets; i++)
1417                 rset_delete((*result_sets)[i]);
1418             grep_info_delete(&grep_info);
1419             return res;
1420         }
1421         if ((*result_sets)[*num_result_sets] == 0)
1422             break;
1423         (*num_result_sets)++;
1424
1425         if (!*termp)
1426             break;
1427     }
1428     grep_info_delete(&grep_info);
1429     return ZEBRA_OK;
1430 }
1431
1432 /**
1433    \brief Create result set(s) for list of terms
1434    \param zh Zebra Handle
1435    \param zapt Attributes Plust Term (RPN leaf)
1436    \param termz term as used in query but converted to UTF-8
1437    \param attributeSet default attribute set
1438    \param stream memory for result
1439    \param index_type register type ("w", "p",..)
1440    \param complete_flag whether it's phrases or not
1441    \param rank_type term flags for ranking
1442    \param xpath_use use attribute for X-Path (-1 for no X-path)
1443    \param rset_nmem memory for result sets
1444    \param result_sets output result set for each term in list (output)
1445    \param num_result_sets number of output result sets
1446    \param kc rset key control to be used for created result sets
1447 */
1448 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1449                                    Z_AttributesPlusTerm *zapt,
1450                                    const char *termz,
1451                                    const Odr_oid *attributeSet,
1452                                    zint hits_limit,
1453                                    NMEM stream,
1454                                    const char *index_type, int complete_flag,
1455                                    const char *rank_type,
1456                                    const char *xpath_use,
1457                                    NMEM rset_nmem,
1458                                    RSET **result_sets, int *num_result_sets,
1459                                    struct rset_key_control *kc)
1460 {
1461     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1462     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1463                                stream, index_type, complete_flag,
1464                                rank_type, xpath_use,
1465                                rset_nmem, result_sets, num_result_sets,
1466                                kc, zm);
1467 }
1468
1469
1470 /** \brief limit a search by position - returns result set
1471  */
1472 static ZEBRA_RES search_position(ZebraHandle zh,
1473                                  Z_AttributesPlusTerm *zapt,
1474                                  const Odr_oid *attributeSet,
1475                                  const char *index_type,
1476                                  NMEM rset_nmem,
1477                                  RSET *rset,
1478                                  struct rset_key_control *kc)
1479 {
1480     int position_value;
1481     AttrType position;
1482     int ord = -1;
1483     char ord_buf[32];
1484     char term_dict[100];
1485     int ord_len;
1486     char *val;
1487     ISAM_P isam_p;
1488     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1489
1490     attr_init_APT(&position, zapt, 3);
1491     position_value = attr_find(&position, NULL);
1492     switch(position_value)
1493     {
1494     case 3:
1495     case -1:
1496         return ZEBRA_OK;
1497     case 1:
1498     case 2:
1499         break;
1500     default:
1501         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1502                             position_value);
1503         return ZEBRA_FAIL;
1504     }
1505
1506
1507     if (!zebra_maps_is_first_in_field(zm))
1508     {
1509         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1510                             position_value);
1511         return ZEBRA_FAIL;
1512     }
1513
1514     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1515                           attributeSet, &ord) != ZEBRA_OK)
1516     {
1517         return ZEBRA_FAIL;
1518     }
1519     ord_len = key_SU_encode(ord, ord_buf);
1520     memcpy(term_dict, ord_buf, ord_len);
1521     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1522     val = dict_lookup(zh->reg->dict, term_dict);
1523     if (val)
1524     {
1525         assert(*val == sizeof(ISAM_P));
1526         memcpy(&isam_p, val+1, sizeof(isam_p));
1527
1528         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1529                                        isam_p, 0);
1530     }
1531     return ZEBRA_OK;
1532 }
1533
1534 /** \brief returns result set for phrase search
1535  */
1536 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1537                                        Z_AttributesPlusTerm *zapt,
1538                                        const char *termz_org,
1539                                        const Odr_oid *attributeSet,
1540                                        zint hits_limit,
1541                                        NMEM stream,
1542                                        const char *index_type,
1543                                        int complete_flag,
1544                                        const char *rank_type,
1545                                        const char *xpath_use,
1546                                        NMEM rset_nmem,
1547                                        RSET *rset,
1548                                        struct rset_key_control *kc)
1549 {
1550     RSET *result_sets = 0;
1551     int num_result_sets = 0;
1552     ZEBRA_RES res =
1553         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1554                           stream, index_type, complete_flag,
1555                           rank_type, xpath_use,
1556                           rset_nmem,
1557                           &result_sets, &num_result_sets, kc);
1558
1559     if (res != ZEBRA_OK)
1560         return res;
1561
1562     if (num_result_sets > 0)
1563     {
1564         RSET first_set = 0;
1565         res = search_position(zh, zapt, attributeSet,
1566                               index_type,
1567                               rset_nmem, &first_set,
1568                               kc);
1569         if (res != ZEBRA_OK)
1570         {
1571             int i;
1572             for (i = 0; i < num_result_sets; i++)
1573                 rset_delete(result_sets[i]);
1574             return res;
1575         }
1576         if (first_set)
1577         {
1578             RSET *nsets = nmem_malloc(stream,
1579                                       sizeof(RSET) * (num_result_sets+1));
1580             nsets[0] = first_set;
1581             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1582             result_sets = nsets;
1583             num_result_sets++;
1584         }
1585     }
1586     if (num_result_sets == 0)
1587         *rset = rset_create_null(rset_nmem, kc, 0);
1588     else if (num_result_sets == 1)
1589         *rset = result_sets[0];
1590     else
1591         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1592                                  num_result_sets, result_sets,
1593                                  1 /* ordered */, 0 /* exclusion */,
1594                                  3 /* relation */, 1 /* distance */);
1595     if (!*rset)
1596         return ZEBRA_FAIL;
1597     return ZEBRA_OK;
1598 }
1599
1600 /** \brief returns result set for or-list search
1601  */
1602 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1603                                         Z_AttributesPlusTerm *zapt,
1604                                         const char *termz_org,
1605                                         const Odr_oid *attributeSet,
1606                                         zint hits_limit,
1607                                         NMEM stream,
1608                                         const char *index_type,
1609                                         int complete_flag,
1610                                         const char *rank_type,
1611                                         const char *xpath_use,
1612                                         NMEM rset_nmem,
1613                                         RSET *rset,
1614                                         struct rset_key_control *kc)
1615 {
1616     RSET *result_sets = 0;
1617     int num_result_sets = 0;
1618     int i;
1619     ZEBRA_RES res =
1620         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1621                           stream, index_type, complete_flag,
1622                           rank_type, xpath_use,
1623                           rset_nmem,
1624                           &result_sets, &num_result_sets, kc);
1625     if (res != ZEBRA_OK)
1626         return res;
1627
1628     for (i = 0; i < num_result_sets; i++)
1629     {
1630         RSET first_set = 0;
1631         res = search_position(zh, zapt, attributeSet,
1632                               index_type,
1633                               rset_nmem, &first_set,
1634                               kc);
1635         if (res != ZEBRA_OK)
1636         {
1637             for (i = 0; i < num_result_sets; i++)
1638                 rset_delete(result_sets[i]);
1639             return res;
1640         }
1641
1642         if (first_set)
1643         {
1644             RSET tmp_set[2];
1645
1646             tmp_set[0] = first_set;
1647             tmp_set[1] = result_sets[i];
1648
1649             result_sets[i] = rset_create_prox(
1650                 rset_nmem, kc, kc->scope,
1651                 2, tmp_set,
1652                 1 /* ordered */, 0 /* exclusion */,
1653                 3 /* relation */, 1 /* distance */);
1654         }
1655     }
1656     if (num_result_sets == 0)
1657         *rset = rset_create_null(rset_nmem, kc, 0);
1658     else if (num_result_sets == 1)
1659         *rset = result_sets[0];
1660     else
1661         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1662                                num_result_sets, result_sets);
1663     if (!*rset)
1664         return ZEBRA_FAIL;
1665     return ZEBRA_OK;
1666 }
1667
1668 /** \brief returns result set for and-list search
1669  */
1670 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1671                                          Z_AttributesPlusTerm *zapt,
1672                                          const char *termz_org,
1673                                          const Odr_oid *attributeSet,
1674                                          zint hits_limit,
1675                                          NMEM stream,
1676                                          const char *index_type,
1677                                          int complete_flag,
1678                                          const char *rank_type,
1679                                          const char *xpath_use,
1680                                          NMEM rset_nmem,
1681                                          RSET *rset,
1682                                          struct rset_key_control *kc)
1683 {
1684     RSET *result_sets = 0;
1685     int num_result_sets = 0;
1686     int i;
1687     ZEBRA_RES res =
1688         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1689                           stream, index_type, complete_flag,
1690                           rank_type, xpath_use,
1691                           rset_nmem,
1692                           &result_sets, &num_result_sets,
1693                           kc);
1694     if (res != ZEBRA_OK)
1695         return res;
1696     for (i = 0; i < num_result_sets; i++)
1697     {
1698         RSET first_set = 0;
1699         res = search_position(zh, zapt, attributeSet,
1700                               index_type,
1701                               rset_nmem, &first_set,
1702                               kc);
1703         if (res != ZEBRA_OK)
1704         {
1705             for (i = 0; i < num_result_sets; i++)
1706                 rset_delete(result_sets[i]);
1707             return res;
1708         }
1709
1710         if (first_set)
1711         {
1712             RSET tmp_set[2];
1713
1714             tmp_set[0] = first_set;
1715             tmp_set[1] = result_sets[i];
1716
1717             result_sets[i] = rset_create_prox(
1718                 rset_nmem, kc, kc->scope,
1719                 2, tmp_set,
1720                 1 /* ordered */, 0 /* exclusion */,
1721                 3 /* relation */, 1 /* distance */);
1722         }
1723     }
1724
1725
1726     if (num_result_sets == 0)
1727         *rset = rset_create_null(rset_nmem, kc, 0);
1728     else if (num_result_sets == 1)
1729         *rset = result_sets[0];
1730     else
1731         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1732                                 num_result_sets, result_sets);
1733     if (!*rset)
1734         return ZEBRA_FAIL;
1735     return ZEBRA_OK;
1736 }
1737
1738 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739                             const char **term_sub,
1740                             WRBUF term_dict,
1741                             const Odr_oid *attributeSet,
1742                             struct grep_info *grep_info,
1743                             int *max_pos,
1744                             zebra_map_t zm,
1745                             WRBUF display_term,
1746                             int *error_code)
1747 {
1748     AttrType relation;
1749     int relation_value;
1750     int term_value;
1751     int r;
1752     WRBUF term_num = wrbuf_alloc();
1753
1754     *error_code = 0;
1755     attr_init_APT(&relation, zapt, 2);
1756     relation_value = attr_find(&relation, NULL);
1757
1758     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1759
1760     switch (relation_value)
1761     {
1762     case 1:
1763         yaz_log(log_level_rpn, "Relation <");
1764         if (!term_100(zm, term_sub, term_num, 1, display_term))
1765         {
1766             wrbuf_destroy(term_num);
1767             return 0;
1768         }
1769         term_value = atoi(wrbuf_cstr(term_num));
1770         gen_regular_rel(term_dict, term_value-1, 1);
1771         break;
1772     case 2:
1773         yaz_log(log_level_rpn, "Relation <=");
1774         if (!term_100(zm, term_sub, term_num, 1, display_term))
1775         {
1776             wrbuf_destroy(term_num);
1777             return 0;
1778         }
1779         term_value = atoi(wrbuf_cstr(term_num));
1780         gen_regular_rel(term_dict, term_value, 1);
1781         break;
1782     case 4:
1783         yaz_log(log_level_rpn, "Relation >=");
1784         if (!term_100(zm, term_sub, term_num, 1, display_term))
1785         {
1786             wrbuf_destroy(term_num);
1787             return 0;
1788         }
1789         term_value = atoi(wrbuf_cstr(term_num));
1790         gen_regular_rel(term_dict, term_value, 0);
1791         break;
1792     case 5:
1793         yaz_log(log_level_rpn, "Relation >");
1794         if (!term_100(zm, term_sub, term_num, 1, display_term))
1795         {
1796             wrbuf_destroy(term_num);
1797             return 0;
1798         }
1799         term_value = atoi(wrbuf_cstr(term_num));
1800         gen_regular_rel(term_dict, term_value+1, 0);
1801         break;
1802     case -1:
1803     case 3:
1804         yaz_log(log_level_rpn, "Relation =");
1805         if (!term_100(zm, term_sub, term_num, 1, display_term))
1806         {
1807             wrbuf_destroy(term_num);
1808             return 0;
1809         }
1810         term_value = atoi(wrbuf_cstr(term_num));
1811         wrbuf_printf(term_dict, "(0*%d)", term_value);
1812         break;
1813     case 103:
1814         /* term_tmp untouched.. */
1815         while (**term_sub != '\0')
1816             (*term_sub)++;
1817         break;
1818     default:
1819         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1820         wrbuf_destroy(term_num);
1821         return 0;
1822     }
1823     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1824                          0, grep_info, max_pos, 0, grep_handle);
1825
1826     if (r == 1)
1827         zebra_set_partial_result(zh);
1828     else if (r)
1829         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1830     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1831     wrbuf_destroy(term_num);
1832     return 1;
1833 }
1834
1835 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1836                               const char **term_sub,
1837                               WRBUF term_dict,
1838                               const Odr_oid *attributeSet, NMEM stream,
1839                               struct grep_info *grep_info,
1840                               const char *index_type, int complete_flag,
1841                               WRBUF display_term,
1842                               const char *xpath_use,
1843                               struct ord_list **ol)
1844 {
1845     const char *termp;
1846     struct rpn_char_map_info rcmi;
1847     int max_pos;
1848     int relation_error = 0;
1849     int ord, ord_len, i;
1850     char ord_buf[32];
1851     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1852
1853     *ol = ord_list_create(stream);
1854
1855     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1856
1857     termp = *term_sub;
1858
1859     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1860                           attributeSet, &ord) != ZEBRA_OK)
1861     {
1862         return ZEBRA_FAIL;
1863     }
1864
1865     wrbuf_rewind(term_dict);
1866
1867     *ol = ord_list_append(stream, *ol, ord);
1868
1869     ord_len = key_SU_encode(ord, ord_buf);
1870
1871     wrbuf_putc(term_dict, '(');
1872     for (i = 0; i < ord_len; i++)
1873     {
1874         wrbuf_putc(term_dict, 1);
1875         wrbuf_putc(term_dict, ord_buf[i]);
1876     }
1877     wrbuf_putc(term_dict, ')');
1878
1879     if (!numeric_relation(zh, zapt, &termp, term_dict,
1880                           attributeSet, grep_info, &max_pos, zm,
1881                           display_term, &relation_error))
1882     {
1883         if (relation_error)
1884         {
1885             zebra_setError(zh, relation_error, 0);
1886             return ZEBRA_FAIL;
1887         }
1888         *term_sub = 0;
1889         return ZEBRA_OK;
1890     }
1891     *term_sub = termp;
1892     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1893     return ZEBRA_OK;
1894 }
1895
1896
1897 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1898                                         Z_AttributesPlusTerm *zapt,
1899                                         const char *termz,
1900                                         const Odr_oid *attributeSet,
1901                                         zint hits_limit,
1902                                         NMEM stream,
1903                                         const char *index_type,
1904                                         int complete_flag,
1905                                         const char *rank_type,
1906                                         const char *xpath_use,
1907                                         NMEM rset_nmem,
1908                                         RSET *rset,
1909                                         struct rset_key_control *kc)
1910 {
1911     const char *termp = termz;
1912     RSET *result_sets = 0;
1913     int num_result_sets = 0;
1914     ZEBRA_RES res;
1915     struct grep_info grep_info;
1916     int alloc_sets = 0;
1917     zint hits_limit_value = hits_limit;
1918     const char *term_ref_id_str = 0;
1919
1920     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1921                           stream);
1922
1923     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1924     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1925         return ZEBRA_FAIL;
1926     while (1)
1927     {
1928         struct ord_list *ol;
1929         WRBUF term_dict = wrbuf_alloc();
1930         WRBUF display_term = wrbuf_alloc();
1931         if (alloc_sets == num_result_sets)
1932         {
1933             int add = 10;
1934             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1935                                               sizeof(*rnew));
1936             if (alloc_sets)
1937                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1938             alloc_sets = alloc_sets + add;
1939             result_sets = rnew;
1940         }
1941         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1942         grep_info.isam_p_indx = 0;
1943         res = numeric_term(zh, zapt, &termp, term_dict,
1944                            attributeSet, stream, &grep_info,
1945                            index_type, complete_flag,
1946                            display_term, xpath_use, &ol);
1947         wrbuf_destroy(term_dict);
1948         if (res == ZEBRA_FAIL || termp == 0)
1949         {
1950             wrbuf_destroy(display_term);
1951             break;
1952         }
1953         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1954         result_sets[num_result_sets] =
1955             rset_trunc(zh, grep_info.isam_p_buf,
1956                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1957                        wrbuf_len(display_term), rank_type,
1958                        0 /* preserve position */,
1959                        zapt->term->which, rset_nmem,
1960                        kc, kc->scope, ol, index_type,
1961                        hits_limit_value,
1962                        term_ref_id_str);
1963         wrbuf_destroy(display_term);
1964         if (!result_sets[num_result_sets])
1965             break;
1966         num_result_sets++;
1967         if (!*termp)
1968             break;
1969     }
1970     grep_info_delete(&grep_info);
1971
1972     if (res != ZEBRA_OK)
1973         return res;
1974     if (num_result_sets == 0)
1975         *rset = rset_create_null(rset_nmem, kc, 0);
1976     else if (num_result_sets == 1)
1977         *rset = result_sets[0];
1978     else
1979         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1980                                 num_result_sets, result_sets);
1981     if (!*rset)
1982         return ZEBRA_FAIL;
1983     return ZEBRA_OK;
1984 }
1985
1986 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1987                                       Z_AttributesPlusTerm *zapt,
1988                                       const char *termz,
1989                                       const Odr_oid *attributeSet,
1990                                       NMEM stream,
1991                                       const char *rank_type, NMEM rset_nmem,
1992                                       RSET *rset,
1993                                       struct rset_key_control *kc)
1994 {
1995     Record rec;
1996     zint sysno = atozint(termz);
1997
1998     if (sysno <= 0)
1999         sysno = 0;
2000     rec = rec_get(zh->reg->records, sysno);
2001     if (!rec)
2002         sysno = 0;
2003
2004     rec_free(&rec);
2005
2006     if (sysno <= 0)
2007     {
2008         *rset = rset_create_null(rset_nmem, kc, 0);
2009     }
2010     else
2011     {
2012         RSFD rsfd;
2013         struct it_key key;
2014         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2015                                  res_get(zh->res, "setTmpDir"), 0);
2016         rsfd = rset_open(*rset, RSETF_WRITE);
2017
2018         key.mem[0] = sysno;
2019         key.mem[1] = 1;
2020         key.len = 2;
2021         rset_write(rsfd, &key);
2022         rset_close(rsfd);
2023     }
2024     return ZEBRA_OK;
2025 }
2026
2027 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2028                                const Odr_oid *attributeSet, NMEM stream,
2029                                Z_SortKeySpecList *sort_sequence,
2030                                const char *rank_type,
2031                                NMEM rset_nmem,
2032                                RSET *rset,
2033                                struct rset_key_control *kc)
2034 {
2035     int i;
2036     int sort_relation_value;
2037     AttrType sort_relation_type;
2038     Z_SortKeySpec *sks;
2039     Z_SortKey *sk;
2040     char termz[20];
2041
2042     attr_init_APT(&sort_relation_type, zapt, 7);
2043     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2044
2045     if (!sort_sequence->specs)
2046     {
2047         sort_sequence->num_specs = 10;
2048         sort_sequence->specs = (Z_SortKeySpec **)
2049             nmem_malloc(stream, sort_sequence->num_specs *
2050                         sizeof(*sort_sequence->specs));
2051         for (i = 0; i < sort_sequence->num_specs; i++)
2052             sort_sequence->specs[i] = 0;
2053     }
2054     if (zapt->term->which != Z_Term_general)
2055         i = 0;
2056     else
2057         i = atoi_n((char *) zapt->term->u.general->buf,
2058                    zapt->term->u.general->len);
2059     if (i >= sort_sequence->num_specs)
2060         i = 0;
2061     sprintf(termz, "%d", i);
2062
2063     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2064     sks->sortElement = (Z_SortElement *)
2065         nmem_malloc(stream, sizeof(*sks->sortElement));
2066     sks->sortElement->which = Z_SortElement_generic;
2067     sk = sks->sortElement->u.generic = (Z_SortKey *)
2068         nmem_malloc(stream, sizeof(*sk));
2069     sk->which = Z_SortKey_sortAttributes;
2070     sk->u.sortAttributes = (Z_SortAttributes *)
2071         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2072
2073     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2074     sk->u.sortAttributes->list = zapt->attributes;
2075
2076     sks->sortRelation = (Odr_int *)
2077         nmem_malloc(stream, sizeof(*sks->sortRelation));
2078     if (sort_relation_value == 1)
2079         *sks->sortRelation = Z_SortKeySpec_ascending;
2080     else if (sort_relation_value == 2)
2081         *sks->sortRelation = Z_SortKeySpec_descending;
2082     else
2083         *sks->sortRelation = Z_SortKeySpec_ascending;
2084
2085     sks->caseSensitivity = (Odr_int *)
2086         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2087     *sks->caseSensitivity = 0;
2088
2089     sks->which = Z_SortKeySpec_null;
2090     sks->u.null = odr_nullval ();
2091     sort_sequence->specs[i] = sks;
2092     *rset = rset_create_null(rset_nmem, kc, 0);
2093     return ZEBRA_OK;
2094 }
2095
2096
2097 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2098                            const Odr_oid *attributeSet,
2099                            struct xpath_location_step *xpath, int max,
2100                            NMEM mem)
2101 {
2102     const Odr_oid *curAttributeSet = attributeSet;
2103     AttrType use;
2104     const char *use_string = 0;
2105
2106     attr_init_APT(&use, zapt, 1);
2107     attr_find_ex(&use, &curAttributeSet, &use_string);
2108
2109     if (!use_string || *use_string != '/')
2110         return -1;
2111
2112     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2113 }
2114
2115
2116
2117 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2118                         const char *index_type, const char *term,
2119                         const char *xpath_use,
2120                         NMEM rset_nmem,
2121                         struct rset_key_control *kc)
2122 {
2123     struct grep_info grep_info;
2124     int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2125                                            zinfo_index_category_index,
2126                                            index_type, xpath_use);
2127     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2128         return rset_create_null(rset_nmem, kc, 0);
2129
2130     if (ord < 0)
2131         return rset_create_null(rset_nmem, kc, 0);
2132     else
2133     {
2134         int i, max_pos;
2135         char ord_buf[32];
2136         RSET rset;
2137         WRBUF term_dict = wrbuf_alloc();
2138         int ord_len = key_SU_encode(ord, ord_buf);
2139         int term_type = Z_Term_characterString;
2140         const char *flags = "void";
2141
2142         wrbuf_putc(term_dict, '(');
2143         for (i = 0; i < ord_len; i++)
2144         {
2145             wrbuf_putc(term_dict, 1);
2146             wrbuf_putc(term_dict, ord_buf[i]);
2147         }
2148         wrbuf_putc(term_dict, ')');
2149         wrbuf_puts(term_dict, term);
2150
2151         grep_info.isam_p_indx = 0;
2152         dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2153                          &grep_info, &max_pos, 0, grep_handle);
2154         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2155                 grep_info.isam_p_indx);
2156         rset = rset_trunc(zh, grep_info.isam_p_buf,
2157                           grep_info.isam_p_indx, term, strlen(term),
2158                           flags, 1, term_type, rset_nmem,
2159                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2160                           0 /* term_ref_id_str */);
2161         grep_info_delete(&grep_info);
2162         wrbuf_destroy(term_dict);
2163         return rset;
2164     }
2165 }
2166
2167 static
2168 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2169                            NMEM stream, const char *rank_type, RSET rset,
2170                            int xpath_len, struct xpath_location_step *xpath,
2171                            NMEM rset_nmem,
2172                            RSET *rset_out,
2173                            struct rset_key_control *kc)
2174 {
2175     int i;
2176     int always_matches = rset ? 0 : 1;
2177
2178     if (xpath_len < 0)
2179     {
2180         *rset_out = rset;
2181         return ZEBRA_OK;
2182     }
2183
2184     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2185     for (i = 0; i < xpath_len; i++)
2186     {
2187         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2188
2189     }
2190
2191     /*
2192     //a    ->    a/.*
2193     //a/b  ->    b/a/.*
2194     /a     ->    a/
2195     /a/b   ->    b/a/
2196
2197     /      ->    none
2198
2199     a[@attr = value]/b[@other = othervalue]
2200
2201     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2202     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2203     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2204     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2205     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2206     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2207
2208     */
2209
2210     dict_grep_cmap(zh->reg->dict, 0, 0);
2211
2212     {
2213         int level = xpath_len;
2214         int first_path = 1;
2215
2216         while (--level >= 0)
2217         {
2218             WRBUF xpath_rev = wrbuf_alloc();
2219             int i;
2220             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2221
2222             for (i = level; i >= 1; --i)
2223             {
2224                 const char *cp = xpath[i].part;
2225                 if (*cp)
2226                 {
2227                     for (; *cp; cp++)
2228                     {
2229                         if (*cp == '*')
2230                             wrbuf_puts(xpath_rev, "[^/]*");
2231                         else if (*cp == ' ')
2232                             wrbuf_puts(xpath_rev, "\001 ");
2233                         else
2234                             wrbuf_putc(xpath_rev, *cp);
2235
2236                         /* wrbuf_putc does not null-terminate , but
2237                            wrbuf_puts below ensures it does.. so xpath_rev
2238                            is OK iff length is > 0 */
2239                     }
2240                     wrbuf_puts(xpath_rev, "/");
2241                 }
2242                 else if (i == 1)  /* // case */
2243                     wrbuf_puts(xpath_rev, ".*");
2244             }
2245             if (xpath[level].predicate &&
2246                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2247                 xpath[level].predicate->u.relation.name[0])
2248             {
2249                 WRBUF wbuf = wrbuf_alloc();
2250                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2251                 if (xpath[level].predicate->u.relation.value)
2252                 {
2253                     const char *cp = xpath[level].predicate->u.relation.value;
2254                     wrbuf_putc(wbuf, '=');
2255
2256                     while (*cp)
2257                     {
2258                         if (strchr(REGEX_CHARS, *cp))
2259                             wrbuf_putc(wbuf, '\\');
2260                         wrbuf_putc(wbuf, *cp);
2261                         cp++;
2262                     }
2263                 }
2264                 rset_attr = xpath_trunc(
2265                     zh, stream, "0", wrbuf_cstr(wbuf),
2266                     ZEBRA_XPATH_ATTR_NAME,
2267                     rset_nmem, kc);
2268                 wrbuf_destroy(wbuf);
2269             }
2270             else
2271             {
2272                 if (!first_path)
2273                 {
2274                     wrbuf_destroy(xpath_rev);
2275                     continue;
2276                 }
2277             }
2278             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2279                     wrbuf_cstr(xpath_rev));
2280             if (wrbuf_len(xpath_rev))
2281             {
2282                 rset_start_tag = xpath_trunc(zh, stream, "0",
2283                                              wrbuf_cstr(xpath_rev),
2284                                              ZEBRA_XPATH_ELM_BEGIN,
2285                                              rset_nmem, kc);
2286                 if (always_matches)
2287                     rset = rset_start_tag;
2288                 else
2289                 {
2290                     rset_end_tag = xpath_trunc(zh, stream, "0",
2291                                                wrbuf_cstr(xpath_rev),
2292                                                ZEBRA_XPATH_ELM_END,
2293                                                rset_nmem, kc);
2294
2295                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2296                                                rset_start_tag, rset,
2297                                                rset_end_tag, rset_attr);
2298                 }
2299             }
2300             wrbuf_destroy(xpath_rev);
2301             first_path = 0;
2302         }
2303     }
2304     *rset_out = rset;
2305     return ZEBRA_OK;
2306 }
2307
2308 #define MAX_XPATH_STEPS 10
2309
2310 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2311                                      Z_AttributesPlusTerm *zapt,
2312                                      const Odr_oid *attributeSet,
2313                                      zint hits_limit, NMEM stream,
2314                                      Z_SortKeySpecList *sort_sequence,
2315                                      NMEM rset_nmem,
2316                                      RSET *rset,
2317                                      struct rset_key_control *kc);
2318
2319 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2320                                 const Odr_oid *attributeSet,
2321                                 zint hits_limit, NMEM stream,
2322                                 Z_SortKeySpecList *sort_sequence,
2323                                 int num_bases, const char **basenames,
2324                                 NMEM rset_nmem,
2325                                 RSET *rset,
2326                                 struct rset_key_control *kc)
2327 {
2328     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2329     ZEBRA_RES res = ZEBRA_OK;
2330     int i;
2331     for (i = 0; i < num_bases; i++)
2332     {
2333
2334         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2335         {
2336             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2337                            basenames[i]);
2338             res = ZEBRA_FAIL;
2339             break;
2340         }
2341         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2342                                   sort_sequence,
2343                                   rset_nmem, rsets+i, kc);
2344         if (res != ZEBRA_OK)
2345             break;
2346     }
2347     if (res != ZEBRA_OK)
2348     {   /* must clean up the already created sets */
2349         while (--i >= 0)
2350             rset_delete(rsets[i]);
2351         *rset = 0;
2352     }
2353     else
2354     {
2355         if (num_bases == 1)
2356             *rset = rsets[0];
2357         else if (num_bases == 0)
2358             *rset = rset_create_null(rset_nmem, kc, 0);
2359         else
2360             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2361                                    num_bases, rsets);
2362     }
2363     return res;
2364 }
2365
2366 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2367                                      Z_AttributesPlusTerm *zapt,
2368                                      const Odr_oid *attributeSet,
2369                                      zint hits_limit, NMEM stream,
2370                                      Z_SortKeySpecList *sort_sequence,
2371                                      NMEM rset_nmem,
2372                                      RSET *rset,
2373                                      struct rset_key_control *kc)
2374 {
2375     ZEBRA_RES res = ZEBRA_OK;
2376     const char *index_type;
2377     char *search_type = NULL;
2378     char rank_type[128];
2379     int complete_flag;
2380     int sort_flag;
2381     char termz[IT_MAX_WORD+1];
2382     int xpath_len;
2383     const char *xpath_use = 0;
2384     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2385
2386     if (!log_level_set)
2387     {
2388         log_level_rpn = yaz_log_module_level("rpn");
2389         log_level_set = 1;
2390     }
2391     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2392                     rank_type, &complete_flag, &sort_flag);
2393
2394     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2395     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2396     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2397     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2398
2399     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2400         return ZEBRA_FAIL;
2401
2402     if (sort_flag)
2403         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2404                              rank_type, rset_nmem, rset, kc);
2405     /* consider if an X-Path query is used */
2406     xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2407                                 xpath, MAX_XPATH_STEPS, stream);
2408     if (xpath_len >= 0)
2409     {
2410         if (xpath[xpath_len-1].part[0] == '@')
2411             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2412         else
2413             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */
2414
2415         if (1)
2416         {
2417             AttrType relation;
2418             int relation_value;
2419
2420             attr_init_APT(&relation, zapt, 2);
2421             relation_value = attr_find(&relation, NULL);
2422
2423             if (relation_value == 103) /* alwaysmatches */
2424             {
2425                 *rset = 0; /* signal no "term" set */
2426                 return rpn_search_xpath(zh, stream, rank_type, *rset,
2427                                         xpath_len, xpath, rset_nmem, rset, kc);
2428             }
2429         }
2430     }
2431
2432     /* search using one of the various search type strategies
2433        termz is our UTF-8 search term
2434        attributeSet is top-level default attribute set
2435        stream is ODR for search
2436        reg_id is the register type
2437        complete_flag is 1 for complete subfield, 0 for incomplete
2438        xpath_use is use-attribute to be used for X-Path search, 0 for none
2439     */
2440     if (!strcmp(search_type, "phrase"))
2441     {
2442         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2443                                     stream,
2444                                     index_type, complete_flag, rank_type,
2445                                     xpath_use,
2446                                     rset_nmem,
2447                                     rset, kc);
2448     }
2449     else if (!strcmp(search_type, "and-list"))
2450     {
2451         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2452                                       stream,
2453                                       index_type, complete_flag, rank_type,
2454                                       xpath_use,
2455                                       rset_nmem,
2456                                       rset, kc);
2457     }
2458     else if (!strcmp(search_type, "or-list"))
2459     {
2460         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2461                                      stream,
2462                                      index_type, complete_flag, rank_type,
2463                                      xpath_use,
2464                                      rset_nmem,
2465                                      rset, kc);
2466     }
2467     else if (!strcmp(search_type, "local"))
2468     {
2469         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2470                                    rank_type, rset_nmem, rset, kc);
2471     }
2472     else if (!strcmp(search_type, "numeric"))
2473     {
2474         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2475                                      stream,
2476                                      index_type, complete_flag, rank_type,
2477                                      xpath_use,
2478                                      rset_nmem,
2479                                      rset, kc);
2480     }
2481     else
2482     {
2483         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2484         res = ZEBRA_FAIL;
2485     }
2486     if (res != ZEBRA_OK)
2487         return res;
2488     if (!*rset)
2489         return ZEBRA_FAIL;
2490     return rpn_search_xpath(zh, stream, rank_type, *rset,
2491                             xpath_len, xpath, rset_nmem, rset, kc);
2492 }
2493
2494 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2495                                       const Odr_oid *attributeSet,
2496                                       zint hits_limit,
2497                                       NMEM stream, NMEM rset_nmem,
2498                                       Z_SortKeySpecList *sort_sequence,
2499                                       int num_bases, const char **basenames,
2500                                       RSET **result_sets, int *num_result_sets,
2501                                       Z_Operator *parent_op,
2502                                       struct rset_key_control *kc);
2503
2504 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2505                                    zint *approx_limit)
2506 {
2507     ZEBRA_RES res = ZEBRA_OK;
2508     if (zs->which == Z_RPNStructure_complex)
2509     {
2510         if (res == ZEBRA_OK)
2511             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2512                                            approx_limit);
2513         if (res == ZEBRA_OK)
2514             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2515                                            approx_limit);
2516     }
2517     else if (zs->which == Z_RPNStructure_simple)
2518     {
2519         if (zs->u.simple->which == Z_Operand_APT)
2520         {
2521             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2522             AttrType global_hits_limit_attr;
2523             int l;
2524
2525             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2526
2527             l = attr_find(&global_hits_limit_attr, NULL);
2528             if (l != -1)
2529                 *approx_limit = l;
2530         }
2531     }
2532     return res;
2533 }
2534
2535 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2536                          const Odr_oid *attributeSet,
2537                          zint hits_limit,
2538                          NMEM stream, NMEM rset_nmem,
2539                          Z_SortKeySpecList *sort_sequence,
2540                          int num_bases, const char **basenames,
2541                          RSET *result_set)
2542 {
2543     RSET *result_sets = 0;
2544     int num_result_sets = 0;
2545     ZEBRA_RES res;
2546     struct rset_key_control *kc = zebra_key_control_create(zh);
2547
2548     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2549                                stream, rset_nmem,
2550                                sort_sequence,
2551                                num_bases, basenames,
2552                                &result_sets, &num_result_sets,
2553                                0 /* no parent op */,
2554                                kc);
2555     if (res != ZEBRA_OK)
2556     {
2557         int i;
2558         for (i = 0; i < num_result_sets; i++)
2559             rset_delete(result_sets[i]);
2560         *result_set = 0;
2561     }
2562     else
2563     {
2564         assert(num_result_sets == 1);
2565         assert(result_sets);
2566         assert(*result_sets);
2567         *result_set = *result_sets;
2568     }
2569     (*kc->dec)(kc);
2570     return res;
2571 }
2572
2573 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2574                                const Odr_oid *attributeSet, zint hits_limit,
2575                                NMEM stream, NMEM rset_nmem,
2576                                Z_SortKeySpecList *sort_sequence,
2577                                int num_bases, const char **basenames,
2578                                RSET **result_sets, int *num_result_sets,
2579                                Z_Operator *parent_op,
2580                                struct rset_key_control *kc)
2581 {
2582     *num_result_sets = 0;
2583     if (zs->which == Z_RPNStructure_complex)
2584     {
2585         ZEBRA_RES res;
2586         Z_Operator *zop = zs->u.complex->roperator;
2587         RSET *result_sets_l = 0;
2588         int num_result_sets_l = 0;
2589         RSET *result_sets_r = 0;
2590         int num_result_sets_r = 0;
2591
2592         res = rpn_search_structure(zh, zs->u.complex->s1,
2593                                    attributeSet, hits_limit, stream, rset_nmem,
2594                                    sort_sequence,
2595                                    num_bases, basenames,
2596                                    &result_sets_l, &num_result_sets_l,
2597                                    zop, kc);
2598         if (res != ZEBRA_OK)
2599         {
2600             int i;
2601             for (i = 0; i < num_result_sets_l; i++)
2602                 rset_delete(result_sets_l[i]);
2603             return res;
2604         }
2605         res = rpn_search_structure(zh, zs->u.complex->s2,
2606                                    attributeSet, hits_limit, stream, rset_nmem,
2607                                    sort_sequence,
2608                                    num_bases, basenames,
2609                                    &result_sets_r, &num_result_sets_r,
2610                                    zop, kc);
2611         if (res != ZEBRA_OK)
2612         {
2613             int i;
2614             for (i = 0; i < num_result_sets_l; i++)
2615                 rset_delete(result_sets_l[i]);
2616             for (i = 0; i < num_result_sets_r; i++)
2617                 rset_delete(result_sets_r[i]);
2618             return res;
2619         }
2620
2621         /* make a new list of result for all children */
2622         *num_result_sets = num_result_sets_l + num_result_sets_r;
2623         *result_sets = nmem_malloc(stream, *num_result_sets *
2624                                    sizeof(**result_sets));
2625         memcpy(*result_sets, result_sets_l,
2626                num_result_sets_l * sizeof(**result_sets));
2627         memcpy(*result_sets + num_result_sets_l, result_sets_r,
2628                num_result_sets_r * sizeof(**result_sets));
2629
2630         if (!parent_op || parent_op->which != zop->which
2631             || (zop->which != Z_Operator_and &&
2632                 zop->which != Z_Operator_or))
2633         {
2634             /* parent node different from this one (or non-present) */
2635             /* we must combine result sets now */
2636             RSET rset;
2637             switch (zop->which)
2638             {
2639             case Z_Operator_and:
2640                 rset = rset_create_and(rset_nmem, kc,
2641                                        kc->scope,
2642                                        *num_result_sets, *result_sets);
2643                 break;
2644             case Z_Operator_or:
2645                 rset = rset_create_or(rset_nmem, kc,
2646                                       kc->scope, 0, /* termid */
2647                                       *num_result_sets, *result_sets);
2648                 break;
2649             case Z_Operator_and_not:
2650                 rset = rset_create_not(rset_nmem, kc,
2651                                        kc->scope,
2652                                        (*result_sets)[0],
2653                                        (*result_sets)[1]);
2654                 break;
2655             case Z_Operator_prox:
2656                 if (zop->u.prox->which != Z_ProximityOperator_known)
2657                 {
2658                     zebra_setError(zh,
2659                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2660                                    0);
2661                     return ZEBRA_FAIL;
2662                 }
2663                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2664                 {
2665                     zebra_setError_zint(zh,
2666                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2667                                         *zop->u.prox->u.known);
2668                     return ZEBRA_FAIL;
2669                 }
2670                 else
2671                 {
2672                     rset = rset_create_prox(rset_nmem, kc,
2673                                             kc->scope,
2674                                             *num_result_sets, *result_sets,
2675                                             *zop->u.prox->ordered,
2676                                             (!zop->u.prox->exclusion ?
2677                                              0 : *zop->u.prox->exclusion),
2678                                             *zop->u.prox->relationType,
2679                                             *zop->u.prox->distance );
2680                 }
2681                 break;
2682             default:
2683                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2684                 return ZEBRA_FAIL;
2685             }
2686             *num_result_sets = 1;
2687             *result_sets = nmem_malloc(stream, *num_result_sets *
2688                                        sizeof(**result_sets));
2689             (*result_sets)[0] = rset;
2690         }
2691     }
2692     else if (zs->which == Z_RPNStructure_simple)
2693     {
2694         RSET rset;
2695         ZEBRA_RES res;
2696
2697         if (zs->u.simple->which == Z_Operand_APT)
2698         {
2699             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2700             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2701                                  attributeSet, hits_limit,
2702                                  stream, sort_sequence,
2703                                  num_bases, basenames, rset_nmem, &rset,
2704                                  kc);
2705             if (res != ZEBRA_OK)
2706                 return res;
2707         }
2708         else if (zs->u.simple->which == Z_Operand_resultSetId)
2709         {
2710             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2711             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2712             if (!rset)
2713             {
2714                 zebra_setError(zh,
2715                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2716                                zs->u.simple->u.resultSetId);
2717                 return ZEBRA_FAIL;
2718             }
2719             rset_dup(rset);
2720         }
2721         else
2722         {
2723             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2724             return ZEBRA_FAIL;
2725         }
2726         *num_result_sets = 1;
2727         *result_sets = nmem_malloc(stream, *num_result_sets *
2728                                    sizeof(**result_sets));
2729         (*result_sets)[0] = rset;
2730     }
2731     else
2732     {
2733         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2734         return ZEBRA_FAIL;
2735     }
2736     return ZEBRA_OK;
2737 }
2738
2739
2740
2741 /*
2742  * Local variables:
2743  * c-basic-offset: 4
2744  * c-file-style: "Stroustrup"
2745  * indent-tabs-mode: nil
2746  * End:
2747  * vim: shiftwidth=4 tabstop=8 expandtab
2748  */
2749