f5aa6698741ff10c706cd199bf421da6f64f0d82
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 2004-2013 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT
75
76 struct grep_info {
77 #ifdef TERM_COUNT
78     int *term_no;
79 #endif
80     ISAM_P *isam_p_buf;
81     int isam_p_size;
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106         int *new_term_no;
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 static int term_102_icu(zebra_map_t zm,
238                         const char **src, WRBUF term_dict, int space_split,
239                         WRBUF display_term)
240 {
241     int no_terms = 0;
242     const char *s0 = *src, *s1;
243     while (*s0 == ' ')
244         s0++;
245     s1 = s0;
246     for (;;)
247     {
248         if (*s1 == ' ' && space_split)
249             break;
250         else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
251             s1++;
252         else
253         {
254             /* EOF or regex reserved char */
255             if (s0 != s1)
256             {
257                 const char *res_buf = 0;
258                 size_t res_len = 0;
259                 const char *display_buf;
260                 size_t display_len;
261
262                 zebra_map_tokenize_start(zm, s0, s1 - s0);
263
264                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
265                                             &display_buf, &display_len))
266                 {
267                     size_t i = res_len;
268                     while (i > 0 && res_buf[--i] != '\x01')
269                         ;
270                     while (i > 0 && res_buf[--i] != '\x01')
271                         ;
272                     res_len = i; /* reduce res_len */
273                     for (i = 0; i < res_len; i++)
274                     {
275                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
276                             wrbuf_putc(term_dict, '\\');
277                         if (res_buf[i] < 32)
278                             wrbuf_putc(term_dict, '\x01');
279
280                         wrbuf_putc(term_dict, res_buf[i]);
281                     }
282                     wrbuf_write(display_term, display_buf, display_len);
283
284                     no_terms++;
285                 }
286             }
287             if (*s1 == '\0')
288                 break;
289
290             wrbuf_putc(term_dict, *s1);
291             wrbuf_putc(display_term, *s1);
292
293             s1++;
294             s0 = s1;
295         }
296     }
297     if (no_terms)
298         wrbuf_puts(term_dict, "\x01\x01.*");
299     *src = s1;
300     return no_terms;
301 }
302
303 static int term_100_icu(zebra_map_t zm,
304                         const char **src, WRBUF term_dict, int space_split,
305                         WRBUF display_term,
306                         int mode)
307 {
308     size_t i;
309     const char *res_buf = 0;
310     size_t res_len = 0;
311     const char *display_buf;
312     size_t display_len;
313     const char *s0 = *src, *s1;
314
315     while (*s0 == ' ')
316         s0++;
317
318     if (*s0 == '\0')
319         return 0;
320
321     if (space_split)
322     {
323         s1 = s0;
324         while (*s1 && *s1 != ' ')
325             s1++;
326     }
327     else
328         s1 = s0 + strlen(s0);
329
330     *src = s1;
331
332     zebra_map_tokenize_start(zm, s0, s1 - s0);
333
334     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
335                                  &display_buf, &display_len))
336     {
337         return 0;
338     }
339     wrbuf_write(display_term, display_buf, display_len);
340     if (mode)
341     {
342         /* ICU sort keys seem to be of the form
343            basechars \x01 accents \x01 length
344            For now we'll just right truncate from basechars . This
345            may give false hits due to accents not being used.
346         */
347         i = res_len;
348         while (i > 0 && res_buf[--i] != '\x01')
349             ;
350         while (i > 0 && res_buf[--i] != '\x01')
351             ;
352         if (i == 0)
353         {  /* did not find base chars at all. Throw error */
354             return -1;
355         }
356         res_len = i; /* reduce res_len */
357     }
358     if (mode & 2)
359         wrbuf_puts(term_dict, ".*");
360     for (i = 0; i < res_len; i++)
361     {
362         if (strchr(REGEX_CHARS "\\", res_buf[i]))
363             wrbuf_putc(term_dict, '\\');
364         if (res_buf[i] < 32)
365             wrbuf_putc(term_dict, '\x01');
366
367         wrbuf_putc(term_dict, res_buf[i]);
368     }
369     if (mode & 1)
370         wrbuf_puts(term_dict, ".*");
371     else if (mode)
372         wrbuf_puts(term_dict, "\x01\x01.*");
373     return 1;
374 }
375
376 /* term_100: handle term, where trunc = none(no operators at all) */
377 static int term_100(zebra_map_t zm,
378                     const char **src, WRBUF term_dict, int space_split,
379                     WRBUF display_term)
380 {
381     const char *s0;
382     const char **map;
383     int i = 0;
384
385     const char *space_start = 0;
386     const char *space_end = 0;
387
388     if (!term_pre(zm, src, 0, !space_split))
389         return 0;
390     s0 = *src;
391     while (*s0)
392     {
393         const char *s1 = s0;
394         int q_map_match = 0;
395         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
396         if (space_split)
397         {
398             if (**map == *CHR_SPACE)
399                 break;
400         }
401         else  /* complete subfield only. */
402         {
403             if (**map == *CHR_SPACE)
404             {   /* save space mapping for later  .. */
405                 space_start = s1;
406                 space_end = s0;
407                 continue;
408             }
409             else if (space_start)
410             {   /* reload last space */
411                 while (space_start < space_end)
412                 {
413                     if (strchr(REGEX_CHARS, *space_start))
414                         wrbuf_putc(term_dict, '\\');
415                     wrbuf_putc(display_term, *space_start);
416                     wrbuf_putc(term_dict, *space_start);
417                     space_start++;
418
419                 }
420                 /* and reset */
421                 space_start = space_end = 0;
422             }
423         }
424         i++;
425
426         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
427     }
428     *src = s0;
429     return i;
430 }
431
432 /* term_101: handle term, where trunc = Process # */
433 static int term_101(zebra_map_t zm,
434                     const char **src, WRBUF term_dict, int space_split,
435                     WRBUF display_term)
436 {
437     const char *s0;
438     const char **map;
439     int i = 0;
440
441     if (!term_pre(zm, src, "#", !space_split))
442         return 0;
443     s0 = *src;
444     while (*s0)
445     {
446         if (*s0 == '#')
447         {
448             i++;
449             wrbuf_puts(term_dict, ".*");
450             wrbuf_putc(display_term, *s0);
451             s0++;
452         }
453         else
454         {
455             const char *s1 = s0;
456             int q_map_match = 0;
457             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
458             if (space_split && **map == *CHR_SPACE)
459                 break;
460
461             i++;
462             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
463         }
464     }
465     *src = s0;
466     return i;
467 }
468
469 /* term_103: handle term, where trunc = re-2 (regular expressions) */
470 static int term_103(zebra_map_t zm, const char **src,
471                     WRBUF term_dict, int *errors, int space_split,
472                     WRBUF display_term)
473 {
474     int i = 0;
475     const char *s0;
476     const char **map;
477
478     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
479         return 0;
480     s0 = *src;
481     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
482         isdigit(((const unsigned char *)s0)[1]))
483     {
484         *errors = s0[1] - '0';
485         s0 += 3;
486         if (*errors > 3)
487             *errors = 3;
488     }
489     while (*s0)
490     {
491         if (strchr("^\\()[].*+?|-", *s0))
492         {
493             wrbuf_putc(display_term, *s0);
494             wrbuf_putc(term_dict, *s0);
495             s0++;
496             i++;
497         }
498         else
499         {
500             const char *s1 = s0;
501             int q_map_match = 0;
502             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
503             if (space_split && **map == *CHR_SPACE)
504                 break;
505
506             i++;
507             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
508         }
509     }
510     *src = s0;
511
512     return i;
513 }
514
515 /* term_103: handle term, where trunc = re-1 (regular expressions) */
516 static int term_102(zebra_map_t zm, const char **src,
517                     WRBUF term_dict, int space_split, WRBUF display_term)
518 {
519     return term_103(zm, src, term_dict, NULL, space_split, display_term);
520 }
521
522
523 /* term_104: handle term, process ?n * # */
524 static int term_104(zebra_map_t zm, const char **src,
525                     WRBUF term_dict, int space_split, WRBUF display_term)
526 {
527     const char *s0;
528     const char **map;
529     int i = 0;
530
531     if (!term_pre(zm, src, "?*#", !space_split))
532         return 0;
533     s0 = *src;
534     while (*s0)
535     {
536         if (*s0 == '?')
537         {
538             i++;
539             wrbuf_putc(display_term, *s0);
540             s0++;
541             if (*s0 >= '0' && *s0 <= '9')
542             {
543                 int limit = 0;
544                 while (*s0 >= '0' && *s0 <= '9')
545                 {
546                     limit = limit * 10 + (*s0 - '0');
547                     wrbuf_putc(display_term, *s0);
548                     s0++;
549                 }
550                 if (limit > 20)
551                     limit = 20;
552                 while (--limit >= 0)
553                 {
554                     wrbuf_puts(term_dict, ".?");
555                 }
556             }
557             else
558             {
559                 wrbuf_puts(term_dict, ".*");
560             }
561         }
562         else if (*s0 == '*')
563         {
564             i++;
565             wrbuf_puts(term_dict, ".*");
566             wrbuf_putc(display_term, *s0);
567             s0++;
568         }
569         else if (*s0 == '#')
570         {
571             i++;
572             wrbuf_puts(term_dict, ".");
573             wrbuf_putc(display_term, *s0);
574             s0++;
575         }
576         else
577         {
578             const char *s1 = s0;
579             int q_map_match = 0;
580             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
581             if (space_split && **map == *CHR_SPACE)
582                 break;
583
584             i++;
585             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
586         }
587     }
588     *src = s0;
589     return i;
590 }
591
592 /* term_105/106: handle term, process * ! and possibly right_truncate */
593 static int term_105(zebra_map_t zm, const char **src,
594                     WRBUF term_dict, int space_split,
595                     WRBUF display_term, int right_truncate)
596 {
597     const char *s0;
598     const char **map;
599     int i = 0;
600
601     if (!term_pre(zm, src, "\\*!", !space_split))
602         return 0;
603     s0 = *src;
604     while (*s0)
605     {
606         if (*s0 == '*')
607         {
608             i++;
609             wrbuf_puts(term_dict, ".*");
610             wrbuf_putc(display_term, *s0);
611             s0++;
612         }
613         else if (*s0 == '!')
614         {
615             i++;
616             wrbuf_putc(term_dict, '.');
617             wrbuf_putc(display_term, *s0);
618             s0++;
619         }
620         else if (*s0 == '\\')
621         {
622             i++;
623             wrbuf_puts(term_dict, "\\\\");
624             wrbuf_putc(display_term, *s0);
625             s0++;
626         }
627         else
628         {
629             const char *s1 = s0;
630             int q_map_match = 0;
631             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
632             if (space_split && **map == *CHR_SPACE)
633                 break;
634
635             i++;
636             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
637         }
638     }
639     if (right_truncate)
640         wrbuf_puts(term_dict, ".*");
641     *src = s0;
642     return i;
643 }
644
645
646 /* gen_regular_rel - generate regular expression from relation
647  *  val:     border value (inclusive)
648  *  islt:    1 if <=; 0 if >=.
649  */
650 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
651 {
652     char dst_buf[20*5*20]; /* assuming enough for expansion */
653     char *dst = dst_buf;
654     int dst_p;
655     int w, d, i;
656     int pos = 0;
657     char numstr[20];
658
659     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
660     if (val >= 0)
661     {
662         if (islt)
663             strcpy(dst, "(-[0-9]+|(");
664         else
665             strcpy(dst, "((");
666     }
667     else
668     {
669         if (!islt)
670         {
671             strcpy(dst, "([0-9]+|-(");
672             islt = 1;
673         }
674         else
675         {
676             strcpy(dst, "(-(");
677             islt = 0;
678         }
679         val = -val;
680     }
681     dst_p = strlen(dst);
682     sprintf(numstr, "%d", val);
683     for (w = strlen(numstr); --w >= 0; pos++)
684     {
685         d = numstr[w];
686         if (pos > 0)
687         {
688             if (islt)
689             {
690                 if (d == '0')
691                     continue;
692                 d--;
693             }
694             else
695             {
696                 if (d == '9')
697                     continue;
698                 d++;
699             }
700         }
701
702         strcpy(dst + dst_p, numstr);
703         dst_p = strlen(dst) - pos - 1;
704
705         if (islt)
706         {
707             if (d != '0')
708             {
709                 dst[dst_p++] = '[';
710                 dst[dst_p++] = '0';
711                 dst[dst_p++] = '-';
712                 dst[dst_p++] = d;
713                 dst[dst_p++] = ']';
714             }
715             else
716                 dst[dst_p++] = d;
717         }
718         else
719         {
720             if (d != '9')
721             {
722                 dst[dst_p++] = '[';
723                 dst[dst_p++] = d;
724                 dst[dst_p++] = '-';
725                 dst[dst_p++] = '9';
726                 dst[dst_p++] = ']';
727             }
728             else
729                 dst[dst_p++] = d;
730         }
731         for (i = 0; i<pos; i++)
732         {
733             dst[dst_p++] = '[';
734             dst[dst_p++] = '0';
735             dst[dst_p++] = '-';
736             dst[dst_p++] = '9';
737             dst[dst_p++] = ']';
738         }
739         dst[dst_p++] = '|';
740     }
741     dst[dst_p] = '\0';
742     if (islt)
743     {
744         /* match everything less than 10^(pos-1) */
745         strcat(dst, "0*");
746         for (i = 1; i<pos; i++)
747             strcat(dst, "[0-9]?");
748     }
749     else
750     {
751         /* match everything greater than 10^pos */
752         for (i = 0; i <= pos; i++)
753             strcat(dst, "[0-9]");
754         strcat(dst, "[0-9]*");
755     }
756     strcat(dst, "))");
757     wrbuf_puts(term_dict, dst);
758 }
759
760 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
761 {
762     const char *src = wrbuf_cstr(wsrc);
763     if (src[*indx] == '\\')
764     {
765         wrbuf_putc(term_p, src[*indx]);
766         (*indx)++;
767     }
768     wrbuf_putc(term_p, src[*indx]);
769     (*indx)++;
770 }
771
772 /*
773  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
774  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
775  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
776  *              ([^-a].*|a[^-b].*|ab[c-].*)
777  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
778  *              ([^a-].*|a[^b-].*|ab[^c-].*)
779  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
780  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
781  */
782 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
783                            const char **term_sub, WRBUF term_dict,
784                            const Odr_oid *attributeSet,
785                            zebra_map_t zm, int space_split,
786                            WRBUF display_term,
787                            int *error_code)
788 {
789     AttrType relation;
790     int relation_value;
791     int i;
792     WRBUF term_component = wrbuf_alloc();
793
794     attr_init_APT(&relation, zapt, 2);
795     relation_value = attr_find(&relation, NULL);
796
797     *error_code = 0;
798     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
799     switch (relation_value)
800     {
801     case 1:
802         if (!term_100(zm, term_sub, term_component, space_split, display_term))
803         {
804             wrbuf_destroy(term_component);
805             return 0;
806         }
807         yaz_log(log_level_rpn, "Relation <");
808
809         wrbuf_putc(term_dict, '(');
810         for (i = 0; i < wrbuf_len(term_component); )
811         {
812             int j = 0;
813
814             if (i)
815                 wrbuf_putc(term_dict, '|');
816             while (j < i)
817                 string_rel_add_char(term_dict, term_component, &j);
818
819             wrbuf_putc(term_dict, '[');
820
821             wrbuf_putc(term_dict, '^');
822
823             wrbuf_putc(term_dict, 1);
824             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
825
826             string_rel_add_char(term_dict, term_component, &i);
827             wrbuf_putc(term_dict, '-');
828
829             wrbuf_putc(term_dict, ']');
830             wrbuf_putc(term_dict, '.');
831             wrbuf_putc(term_dict, '*');
832         }
833         wrbuf_putc(term_dict, ')');
834         break;
835     case 2:
836         if (!term_100(zm, term_sub, term_component, space_split, display_term))
837         {
838             wrbuf_destroy(term_component);
839             return 0;
840         }
841         yaz_log(log_level_rpn, "Relation <=");
842
843         wrbuf_putc(term_dict, '(');
844         for (i = 0; i < wrbuf_len(term_component); )
845         {
846             int j = 0;
847
848             while (j < i)
849                 string_rel_add_char(term_dict, term_component, &j);
850             wrbuf_putc(term_dict, '[');
851
852             wrbuf_putc(term_dict, '^');
853
854             wrbuf_putc(term_dict, 1);
855             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
856
857             string_rel_add_char(term_dict, term_component, &i);
858             wrbuf_putc(term_dict, '-');
859
860             wrbuf_putc(term_dict, ']');
861             wrbuf_putc(term_dict, '.');
862             wrbuf_putc(term_dict, '*');
863
864             wrbuf_putc(term_dict, '|');
865         }
866         for (i = 0; i < wrbuf_len(term_component); )
867             string_rel_add_char(term_dict, term_component, &i);
868         wrbuf_putc(term_dict, ')');
869         break;
870     case 5:
871         if (!term_100(zm, term_sub, term_component, space_split, display_term))
872         {
873             wrbuf_destroy(term_component);
874             return 0;
875         }
876         yaz_log(log_level_rpn, "Relation >");
877
878         wrbuf_putc(term_dict, '(');
879         for (i = 0; i < wrbuf_len(term_component); )
880         {
881             int j = 0;
882
883             while (j < i)
884                 string_rel_add_char(term_dict, term_component, &j);
885             wrbuf_putc(term_dict, '[');
886
887             wrbuf_putc(term_dict, '^');
888             wrbuf_putc(term_dict, '-');
889             string_rel_add_char(term_dict, term_component, &i);
890
891             wrbuf_putc(term_dict, ']');
892             wrbuf_putc(term_dict, '.');
893             wrbuf_putc(term_dict, '*');
894
895             wrbuf_putc(term_dict, '|');
896         }
897         for (i = 0; i < wrbuf_len(term_component); )
898             string_rel_add_char(term_dict, term_component, &i);
899         wrbuf_putc(term_dict, '.');
900         wrbuf_putc(term_dict, '+');
901         wrbuf_putc(term_dict, ')');
902         break;
903     case 4:
904         if (!term_100(zm, term_sub, term_component, space_split, display_term))
905         {
906             wrbuf_destroy(term_component);
907             return 0;
908         }
909         yaz_log(log_level_rpn, "Relation >=");
910
911         wrbuf_putc(term_dict, '(');
912         for (i = 0; i < wrbuf_len(term_component); )
913         {
914             int j = 0;
915
916             if (i)
917                 wrbuf_putc(term_dict, '|');
918             while (j < i)
919                 string_rel_add_char(term_dict, term_component, &j);
920             wrbuf_putc(term_dict, '[');
921
922             if (i < wrbuf_len(term_component)-1)
923             {
924                 wrbuf_putc(term_dict, '^');
925                 wrbuf_putc(term_dict, '-');
926                 string_rel_add_char(term_dict, term_component, &i);
927             }
928             else
929             {
930                 string_rel_add_char(term_dict, term_component, &i);
931                 wrbuf_putc(term_dict, '-');
932             }
933             wrbuf_putc(term_dict, ']');
934             wrbuf_putc(term_dict, '.');
935             wrbuf_putc(term_dict, '*');
936         }
937         wrbuf_putc(term_dict, ')');
938         break;
939     case 3:
940     case 102:
941     case -1:
942         if (!**term_sub)
943             return 1;
944         yaz_log(log_level_rpn, "Relation =");
945         if (!term_100(zm, term_sub, term_component, space_split, display_term))
946         {
947             wrbuf_destroy(term_component);
948             return 0;
949         }
950         wrbuf_puts(term_dict, "(");
951         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
952         wrbuf_puts(term_dict, ")");
953         break;
954     case 103:
955         yaz_log(log_level_rpn, "Relation always matches");
956         /* skip to end of term (we don't care what it is) */
957         while (**term_sub != '\0')
958             (*term_sub)++;
959         break;
960     default:
961         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
962         wrbuf_destroy(term_component);
963         return 0;
964     }
965     wrbuf_destroy(term_component);
966     return 1;
967 }
968
969 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
970                              const char **term_sub,
971                              WRBUF term_dict,
972                              const Odr_oid *attributeSet, NMEM stream,
973                              struct grep_info *grep_info,
974                              const char *index_type, int complete_flag,
975                              WRBUF display_term,
976                              const char *xpath_use,
977                              struct ord_list **ol,
978                              zebra_map_t zm);
979
980 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
981                                 Z_AttributesPlusTerm *zapt,
982                                 zint *hits_limit_value,
983                                 const char **term_ref_id_str,
984                                 NMEM nmem)
985 {
986     AttrType term_ref_id_attr;
987     AttrType hits_limit_attr;
988     int term_ref_id_int;
989     zint hits_limit_from_attr;
990
991     attr_init_APT(&hits_limit_attr, zapt, 11);
992     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
993
994     attr_init_APT(&term_ref_id_attr, zapt, 10);
995     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
996     if (term_ref_id_int >= 0)
997     {
998         char *res = nmem_malloc(nmem, 20);
999         sprintf(res, "%d", term_ref_id_int);
1000         *term_ref_id_str = res;
1001     }
1002     if (hits_limit_from_attr != -1)
1003         *hits_limit_value = hits_limit_from_attr;
1004
1005     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1006             *term_ref_id_str ? *term_ref_id_str : "none",
1007             *hits_limit_value);
1008     return ZEBRA_OK;
1009 }
1010
1011 /** \brief search for term (which may be truncated)
1012  */
1013 static ZEBRA_RES search_term(ZebraHandle zh,
1014                              Z_AttributesPlusTerm *zapt,
1015                              const char **term_sub,
1016                              const Odr_oid *attributeSet,
1017                              zint hits_limit, NMEM stream,
1018                              struct grep_info *grep_info,
1019                              const char *index_type, int complete_flag,
1020                              const char *rank_type,
1021                              const char *xpath_use,
1022                              NMEM rset_nmem,
1023                              RSET *rset,
1024                              struct rset_key_control *kc,
1025                              zebra_map_t zm)
1026 {
1027     ZEBRA_RES res;
1028     struct ord_list *ol;
1029     zint hits_limit_value = hits_limit;
1030     const char *term_ref_id_str = 0;
1031     WRBUF term_dict = wrbuf_alloc();
1032     WRBUF display_term = wrbuf_alloc();
1033     *rset = 0;
1034     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1035                           stream);
1036     grep_info->isam_p_indx = 0;
1037     res = string_term(zh, zapt, term_sub, term_dict,
1038                       attributeSet, stream, grep_info,
1039                       index_type, complete_flag,
1040                       display_term, xpath_use, &ol, zm);
1041     wrbuf_destroy(term_dict);
1042     if (res == ZEBRA_OK && *term_sub)
1043     {
1044         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1045         *rset = rset_trunc(zh, grep_info->isam_p_buf,
1046                            grep_info->isam_p_indx, wrbuf_buf(display_term),
1047                            wrbuf_len(display_term), rank_type,
1048                            1 /* preserve pos */,
1049                            zapt->term->which, rset_nmem,
1050                            kc, kc->scope, ol, index_type, hits_limit_value,
1051                            term_ref_id_str);
1052         if (!*rset)
1053             res = ZEBRA_FAIL;
1054     }
1055     wrbuf_destroy(display_term);
1056     return res;
1057 }
1058
1059 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1060                              const char **term_sub,
1061                              WRBUF term_dict,
1062                              const Odr_oid *attributeSet, NMEM stream,
1063                              struct grep_info *grep_info,
1064                              const char *index_type, int complete_flag,
1065                              WRBUF display_term,
1066                              const char *xpath_use,
1067                              struct ord_list **ol,
1068                              zebra_map_t zm)
1069 {
1070     int r;
1071     AttrType truncation;
1072     int truncation_value;
1073     const char *termp;
1074     struct rpn_char_map_info rcmi;
1075
1076     int space_split = complete_flag ? 0 : 1;
1077     int ord = -1;
1078     int regex_range = 0;
1079     int max_pos, prefix_len = 0;
1080     int relation_error;
1081     char ord_buf[32];
1082     int ord_len, i;
1083
1084     *ol = ord_list_create(stream);
1085
1086     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1087     attr_init_APT(&truncation, zapt, 5);
1088     truncation_value = attr_find(&truncation, NULL);
1089     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1090
1091     termp = *term_sub; /* start of term for each database */
1092
1093     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1094                           attributeSet, &ord) != ZEBRA_OK)
1095     {
1096         *term_sub = 0;
1097         return ZEBRA_FAIL;
1098     }
1099
1100     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1101
1102     *ol = ord_list_append(stream, *ol, ord);
1103     ord_len = key_SU_encode(ord, ord_buf);
1104
1105     wrbuf_putc(term_dict, '(');
1106
1107     for (i = 0; i<ord_len; i++)
1108     {
1109         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1110         wrbuf_putc(term_dict, ord_buf[i]);
1111     }
1112     wrbuf_putc(term_dict, ')');
1113
1114     prefix_len = wrbuf_len(term_dict);
1115
1116     if (zebra_maps_is_icu(zm))
1117     {
1118         int relation_value;
1119         AttrType relation;
1120
1121         attr_init_APT(&relation, zapt, 2);
1122         relation_value = attr_find(&relation, NULL);
1123         if (relation_value == 103) /* always matches */
1124             termp += strlen(termp); /* move to end of term */
1125         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1126         {
1127             /* ICU case */
1128             switch (truncation_value)
1129             {
1130             case -1:         /* not specified */
1131             case 100:        /* do not truncate */
1132                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1133                 {
1134                     *term_sub = 0;
1135                     return ZEBRA_OK;
1136                 }
1137                 break;
1138             case 102:
1139                 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1140                 {
1141                     *term_sub = 0;
1142                     return ZEBRA_OK;
1143                 }
1144                 break;
1145             case 1:          /* right truncation */
1146                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1147                 {
1148                     *term_sub = 0;
1149                     return ZEBRA_OK;
1150                 }
1151                 break;
1152             case 2:
1153                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1154                 {
1155                     *term_sub = 0;
1156                     return ZEBRA_OK;
1157                 }
1158                 break;
1159             case 3:
1160                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1161                 {
1162                     *term_sub = 0;
1163                     return ZEBRA_OK;
1164                 }
1165                 break;
1166             default:
1167                 zebra_setError_zint(zh,
1168                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1169                                     truncation_value);
1170                 return ZEBRA_FAIL;
1171             }
1172         }
1173         else
1174         {
1175             zebra_setError_zint(zh,
1176                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1177                                 relation_value);
1178             return ZEBRA_FAIL;
1179         }
1180     }
1181     else
1182     {
1183         /* non-ICU case. using string.chr and friends */
1184         switch (truncation_value)
1185         {
1186         case -1:         /* not specified */
1187         case 100:        /* do not truncate */
1188             if (!string_relation(zh, zapt, &termp, term_dict,
1189                                  attributeSet,
1190                                  zm, space_split, display_term,
1191                                  &relation_error))
1192             {
1193                 if (relation_error)
1194                 {
1195                     zebra_setError(zh, relation_error, 0);
1196                     return ZEBRA_FAIL;
1197                 }
1198                 *term_sub = 0;
1199                 return ZEBRA_OK;
1200             }
1201             break;
1202         case 1:          /* right truncation */
1203             wrbuf_putc(term_dict, '(');
1204             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1205             {
1206                 *term_sub = 0;
1207                 return ZEBRA_OK;
1208             }
1209             wrbuf_puts(term_dict, ".*)");
1210             break;
1211         case 2:          /* left truncation */
1212             wrbuf_puts(term_dict, "(.*");
1213             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1214             {
1215                 *term_sub = 0;
1216                 return ZEBRA_OK;
1217             }
1218             wrbuf_putc(term_dict, ')');
1219             break;
1220         case 3:          /* left&right truncation */
1221             wrbuf_puts(term_dict, "(.*");
1222             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1223             {
1224                 *term_sub = 0;
1225                 return ZEBRA_OK;
1226             }
1227             wrbuf_puts(term_dict, ".*)");
1228             break;
1229         case 101:        /* process # in term */
1230             wrbuf_putc(term_dict, '(');
1231             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1232             {
1233                 *term_sub = 0;
1234                 return ZEBRA_OK;
1235             }
1236             wrbuf_puts(term_dict, ")");
1237             break;
1238         case 102:        /* Regexp-1 */
1239             wrbuf_putc(term_dict, '(');
1240             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1241             {
1242                 *term_sub = 0;
1243                 return ZEBRA_OK;
1244             }
1245             wrbuf_putc(term_dict, ')');
1246             break;
1247         case 103:       /* Regexp-2 */
1248             regex_range = 1;
1249             wrbuf_putc(term_dict, '(');
1250             if (!term_103(zm, &termp, term_dict, &regex_range,
1251                           space_split, display_term))
1252             {
1253                 *term_sub = 0;
1254                 return ZEBRA_OK;
1255             }
1256             wrbuf_putc(term_dict, ')');
1257             break;
1258         case 104:        /* process ?n * # term */
1259             wrbuf_putc(term_dict, '(');
1260             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1261             {
1262                 *term_sub = 0;
1263                 return ZEBRA_OK;
1264             }
1265             wrbuf_putc(term_dict, ')');
1266             break;
1267         case 105:        /* process * ! in term and right truncate */
1268             wrbuf_putc(term_dict, '(');
1269             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1270             {
1271                 *term_sub = 0;
1272                 return ZEBRA_OK;
1273             }
1274             wrbuf_putc(term_dict, ')');
1275             break;
1276         case 106:        /* process * ! in term */
1277             wrbuf_putc(term_dict, '(');
1278             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1279             {
1280                 *term_sub = 0;
1281                 return ZEBRA_OK;
1282             }
1283             wrbuf_putc(term_dict, ')');
1284             break;
1285         default:
1286             zebra_setError_zint(zh,
1287                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1288                                 truncation_value);
1289             return ZEBRA_FAIL;
1290         }
1291     }
1292     if (1)
1293     {
1294         char buf[1000];
1295         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1296         esc_str(buf, sizeof(buf), input, strlen(input));
1297     }
1298     {
1299         WRBUF pr_wr = wrbuf_alloc();
1300
1301         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1302         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1303         wrbuf_destroy(pr_wr);
1304     }
1305     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1306                          grep_info, &max_pos,
1307                          ord_len /* number of "exact" chars */,
1308                          grep_handle);
1309     if (r == 1)
1310         zebra_set_partial_result(zh);
1311     else if (r)
1312         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1313     *term_sub = termp;
1314     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1315     return ZEBRA_OK;
1316 }
1317
1318
1319
1320 static void grep_info_delete(struct grep_info *grep_info)
1321 {
1322 #ifdef TERM_COUNT
1323     xfree(grep_info->term_no);
1324 #endif
1325     xfree(grep_info->isam_p_buf);
1326 }
1327
1328 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1329                                    Z_AttributesPlusTerm *zapt,
1330                                    struct grep_info *grep_info,
1331                                    const char *index_type)
1332 {
1333 #ifdef TERM_COUNT
1334     grep_info->term_no = 0;
1335 #endif
1336     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1337     grep_info->isam_p_size = 0;
1338     grep_info->isam_p_buf = NULL;
1339     grep_info->zh = zh;
1340     grep_info->index_type = index_type;
1341     grep_info->termset = 0;
1342     if (zapt)
1343     {
1344         AttrType truncmax;
1345         int truncmax_value;
1346
1347         attr_init_APT(&truncmax, zapt, 13);
1348         truncmax_value = attr_find(&truncmax, NULL);
1349         if (truncmax_value != -1)
1350             grep_info->trunc_max = truncmax_value;
1351     }
1352     if (zapt)
1353     {
1354         AttrType termset;
1355         int termset_value_numeric;
1356         const char *termset_value_string;
1357
1358         attr_init_APT(&termset, zapt, 8);
1359         termset_value_numeric =
1360             attr_find_ex(&termset, NULL, &termset_value_string);
1361         if (termset_value_numeric != -1)
1362         {
1363 #if TERMSET_DISABLE
1364             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1365             return ZEBRA_FAIL;
1366 #else
1367             char resname[32];
1368             const char *termset_name = 0;
1369             if (termset_value_numeric != -2)
1370             {
1371
1372                 sprintf(resname, "%d", termset_value_numeric);
1373                 termset_name = resname;
1374             }
1375             else
1376                 termset_name = termset_value_string;
1377             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1378             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1379             if (!grep_info->termset)
1380             {
1381                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1382                 return ZEBRA_FAIL;
1383             }
1384 #endif
1385         }
1386     }
1387     return ZEBRA_OK;
1388 }
1389
1390 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1391                                      Z_AttributesPlusTerm *zapt,
1392                                      const char *termz,
1393                                      const Odr_oid *attributeSet,
1394                                      zint hits_limit,
1395                                      NMEM stream,
1396                                      const char *index_type, int complete_flag,
1397                                      const char *rank_type,
1398                                      const char *xpath_use,
1399                                      NMEM rset_nmem,
1400                                      RSET **result_sets, int *num_result_sets,
1401                                      struct rset_key_control *kc,
1402                                      zebra_map_t zm)
1403 {
1404     struct grep_info grep_info;
1405     const char *termp = termz;
1406     int alloc_sets = 0;
1407
1408     *num_result_sets = 0;
1409     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1410         return ZEBRA_FAIL;
1411     while (1)
1412     {
1413         ZEBRA_RES res;
1414
1415         if (alloc_sets == *num_result_sets)
1416         {
1417             int add = 10;
1418             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1419                                               sizeof(*rnew));
1420             if (alloc_sets)
1421                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1422             alloc_sets = alloc_sets + add;
1423             *result_sets = rnew;
1424         }
1425         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1426                           stream, &grep_info,
1427                           index_type, complete_flag,
1428                           rank_type,
1429                           xpath_use, rset_nmem,
1430                           &(*result_sets)[*num_result_sets],
1431                           kc, zm);
1432         if (res != ZEBRA_OK)
1433         {
1434             int i;
1435             for (i = 0; i < *num_result_sets; i++)
1436                 rset_delete((*result_sets)[i]);
1437             grep_info_delete(&grep_info);
1438             return res;
1439         }
1440         if ((*result_sets)[*num_result_sets] == 0)
1441             break;
1442         (*num_result_sets)++;
1443
1444         if (!*termp)
1445             break;
1446     }
1447     grep_info_delete(&grep_info);
1448     return ZEBRA_OK;
1449 }
1450
1451 /**
1452    \brief Create result set(s) for list of terms
1453    \param zh Zebra Handle
1454    \param zapt Attributes Plust Term (RPN leaf)
1455    \param termz term as used in query but converted to UTF-8
1456    \param attributeSet default attribute set
1457    \param stream memory for result
1458    \param index_type register type ("w", "p",..)
1459    \param complete_flag whether it's phrases or not
1460    \param rank_type term flags for ranking
1461    \param xpath_use use attribute for X-Path (-1 for no X-path)
1462    \param rset_nmem memory for result sets
1463    \param result_sets output result set for each term in list (output)
1464    \param num_result_sets number of output result sets
1465    \param kc rset key control to be used for created result sets
1466 */
1467 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1468                                    Z_AttributesPlusTerm *zapt,
1469                                    const char *termz,
1470                                    const Odr_oid *attributeSet,
1471                                    zint hits_limit,
1472                                    NMEM stream,
1473                                    const char *index_type, int complete_flag,
1474                                    const char *rank_type,
1475                                    const char *xpath_use,
1476                                    NMEM rset_nmem,
1477                                    RSET **result_sets, int *num_result_sets,
1478                                    struct rset_key_control *kc)
1479 {
1480     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1481     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1482                                stream, index_type, complete_flag,
1483                                rank_type, xpath_use,
1484                                rset_nmem, result_sets, num_result_sets,
1485                                kc, zm);
1486 }
1487
1488
1489 /** \brief limit a search by position - returns result set
1490  */
1491 static ZEBRA_RES search_position(ZebraHandle zh,
1492                                  Z_AttributesPlusTerm *zapt,
1493                                  const Odr_oid *attributeSet,
1494                                  const char *index_type,
1495                                  NMEM rset_nmem,
1496                                  RSET *rset,
1497                                  struct rset_key_control *kc)
1498 {
1499     int position_value;
1500     AttrType position;
1501     int ord = -1;
1502     char ord_buf[32];
1503     char term_dict[100];
1504     int ord_len;
1505     char *val;
1506     ISAM_P isam_p;
1507     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1508
1509     attr_init_APT(&position, zapt, 3);
1510     position_value = attr_find(&position, NULL);
1511     switch(position_value)
1512     {
1513     case 3:
1514     case -1:
1515         return ZEBRA_OK;
1516     case 1:
1517     case 2:
1518         break;
1519     default:
1520         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1521                             position_value);
1522         return ZEBRA_FAIL;
1523     }
1524
1525
1526     if (!zebra_maps_is_first_in_field(zm))
1527     {
1528         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1529                             position_value);
1530         return ZEBRA_FAIL;
1531     }
1532
1533     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1534                           attributeSet, &ord) != ZEBRA_OK)
1535     {
1536         return ZEBRA_FAIL;
1537     }
1538     ord_len = key_SU_encode(ord, ord_buf);
1539     memcpy(term_dict, ord_buf, ord_len);
1540     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1541     val = dict_lookup(zh->reg->dict, term_dict);
1542     if (val)
1543     {
1544         assert(*val == sizeof(ISAM_P));
1545         memcpy(&isam_p, val+1, sizeof(isam_p));
1546
1547         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1548                                        isam_p, 0);
1549     }
1550     return ZEBRA_OK;
1551 }
1552
1553 /** \brief returns result set for phrase search
1554  */
1555 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1556                                        Z_AttributesPlusTerm *zapt,
1557                                        const char *termz_org,
1558                                        const Odr_oid *attributeSet,
1559                                        zint hits_limit,
1560                                        NMEM stream,
1561                                        const char *index_type,
1562                                        int complete_flag,
1563                                        const char *rank_type,
1564                                        const char *xpath_use,
1565                                        NMEM rset_nmem,
1566                                        RSET *rset,
1567                                        struct rset_key_control *kc)
1568 {
1569     RSET *result_sets = 0;
1570     int num_result_sets = 0;
1571     ZEBRA_RES res =
1572         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1573                           stream, index_type, complete_flag,
1574                           rank_type, xpath_use,
1575                           rset_nmem,
1576                           &result_sets, &num_result_sets, kc);
1577
1578     if (res != ZEBRA_OK)
1579         return res;
1580
1581     if (num_result_sets > 0)
1582     {
1583         RSET first_set = 0;
1584         res = search_position(zh, zapt, attributeSet,
1585                               index_type,
1586                               rset_nmem, &first_set,
1587                               kc);
1588         if (res != ZEBRA_OK)
1589         {
1590             int i;
1591             for (i = 0; i<num_result_sets; i++)
1592                 rset_delete(result_sets[i]);
1593             return res;
1594         }
1595         if (first_set)
1596         {
1597             RSET *nsets = nmem_malloc(stream,
1598                                       sizeof(RSET) * (num_result_sets+1));
1599             nsets[0] = first_set;
1600             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1601             result_sets = nsets;
1602             num_result_sets++;
1603         }
1604     }
1605     if (num_result_sets == 0)
1606         *rset = rset_create_null(rset_nmem, kc, 0);
1607     else if (num_result_sets == 1)
1608         *rset = result_sets[0];
1609     else
1610         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1611                                  num_result_sets, result_sets,
1612                                  1 /* ordered */, 0 /* exclusion */,
1613                                  3 /* relation */, 1 /* distance */);
1614     if (!*rset)
1615         return ZEBRA_FAIL;
1616     return ZEBRA_OK;
1617 }
1618
1619 /** \brief returns result set for or-list search
1620  */
1621 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1622                                         Z_AttributesPlusTerm *zapt,
1623                                         const char *termz_org,
1624                                         const Odr_oid *attributeSet,
1625                                         zint hits_limit,
1626                                         NMEM stream,
1627                                         const char *index_type,
1628                                         int complete_flag,
1629                                         const char *rank_type,
1630                                         const char *xpath_use,
1631                                         NMEM rset_nmem,
1632                                         RSET *rset,
1633                                         struct rset_key_control *kc)
1634 {
1635     RSET *result_sets = 0;
1636     int num_result_sets = 0;
1637     int i;
1638     ZEBRA_RES res =
1639         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1640                           stream, index_type, complete_flag,
1641                           rank_type, xpath_use,
1642                           rset_nmem,
1643                           &result_sets, &num_result_sets, kc);
1644     if (res != ZEBRA_OK)
1645         return res;
1646
1647     for (i = 0; i<num_result_sets; i++)
1648     {
1649         RSET first_set = 0;
1650         res = search_position(zh, zapt, attributeSet,
1651                               index_type,
1652                               rset_nmem, &first_set,
1653                               kc);
1654         if (res != ZEBRA_OK)
1655         {
1656             for (i = 0; i<num_result_sets; i++)
1657                 rset_delete(result_sets[i]);
1658             return res;
1659         }
1660
1661         if (first_set)
1662         {
1663             RSET tmp_set[2];
1664
1665             tmp_set[0] = first_set;
1666             tmp_set[1] = result_sets[i];
1667
1668             result_sets[i] = rset_create_prox(
1669                 rset_nmem, kc, kc->scope,
1670                 2, tmp_set,
1671                 1 /* ordered */, 0 /* exclusion */,
1672                 3 /* relation */, 1 /* distance */);
1673         }
1674     }
1675     if (num_result_sets == 0)
1676         *rset = rset_create_null(rset_nmem, kc, 0);
1677     else if (num_result_sets == 1)
1678         *rset = result_sets[0];
1679     else
1680         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1681                                num_result_sets, result_sets);
1682     if (!*rset)
1683         return ZEBRA_FAIL;
1684     return ZEBRA_OK;
1685 }
1686
1687 /** \brief returns result set for and-list search
1688  */
1689 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1690                                          Z_AttributesPlusTerm *zapt,
1691                                          const char *termz_org,
1692                                          const Odr_oid *attributeSet,
1693                                          zint hits_limit,
1694                                          NMEM stream,
1695                                          const char *index_type,
1696                                          int complete_flag,
1697                                          const char *rank_type,
1698                                          const char *xpath_use,
1699                                          NMEM rset_nmem,
1700                                          RSET *rset,
1701                                          struct rset_key_control *kc)
1702 {
1703     RSET *result_sets = 0;
1704     int num_result_sets = 0;
1705     int i;
1706     ZEBRA_RES res =
1707         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1708                           stream, index_type, complete_flag,
1709                           rank_type, xpath_use,
1710                           rset_nmem,
1711                           &result_sets, &num_result_sets,
1712                           kc);
1713     if (res != ZEBRA_OK)
1714         return res;
1715     for (i = 0; i<num_result_sets; i++)
1716     {
1717         RSET first_set = 0;
1718         res = search_position(zh, zapt, attributeSet,
1719                               index_type,
1720                               rset_nmem, &first_set,
1721                               kc);
1722         if (res != ZEBRA_OK)
1723         {
1724             for (i = 0; i<num_result_sets; i++)
1725                 rset_delete(result_sets[i]);
1726             return res;
1727         }
1728
1729         if (first_set)
1730         {
1731             RSET tmp_set[2];
1732
1733             tmp_set[0] = first_set;
1734             tmp_set[1] = result_sets[i];
1735
1736             result_sets[i] = rset_create_prox(
1737                 rset_nmem, kc, kc->scope,
1738                 2, tmp_set,
1739                 1 /* ordered */, 0 /* exclusion */,
1740                 3 /* relation */, 1 /* distance */);
1741         }
1742     }
1743
1744
1745     if (num_result_sets == 0)
1746         *rset = rset_create_null(rset_nmem, kc, 0);
1747     else if (num_result_sets == 1)
1748         *rset = result_sets[0];
1749     else
1750         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1751                                 num_result_sets, result_sets);
1752     if (!*rset)
1753         return ZEBRA_FAIL;
1754     return ZEBRA_OK;
1755 }
1756
1757 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1758                             const char **term_sub,
1759                             WRBUF term_dict,
1760                             const Odr_oid *attributeSet,
1761                             struct grep_info *grep_info,
1762                             int *max_pos,
1763                             zebra_map_t zm,
1764                             WRBUF display_term,
1765                             int *error_code)
1766 {
1767     AttrType relation;
1768     int relation_value;
1769     int term_value;
1770     int r;
1771     WRBUF term_num = wrbuf_alloc();
1772
1773     *error_code = 0;
1774     attr_init_APT(&relation, zapt, 2);
1775     relation_value = attr_find(&relation, NULL);
1776
1777     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1778
1779     switch (relation_value)
1780     {
1781     case 1:
1782         yaz_log(log_level_rpn, "Relation <");
1783         if (!term_100(zm, term_sub, term_num, 1, display_term))
1784         {
1785             wrbuf_destroy(term_num);
1786             return 0;
1787         }
1788         term_value = atoi(wrbuf_cstr(term_num));
1789         gen_regular_rel(term_dict, term_value-1, 1);
1790         break;
1791     case 2:
1792         yaz_log(log_level_rpn, "Relation <=");
1793         if (!term_100(zm, term_sub, term_num, 1, display_term))
1794         {
1795             wrbuf_destroy(term_num);
1796             return 0;
1797         }
1798         term_value = atoi(wrbuf_cstr(term_num));
1799         gen_regular_rel(term_dict, term_value, 1);
1800         break;
1801     case 4:
1802         yaz_log(log_level_rpn, "Relation >=");
1803         if (!term_100(zm, term_sub, term_num, 1, display_term))
1804         {
1805             wrbuf_destroy(term_num);
1806             return 0;
1807         }
1808         term_value = atoi(wrbuf_cstr(term_num));
1809         gen_regular_rel(term_dict, term_value, 0);
1810         break;
1811     case 5:
1812         yaz_log(log_level_rpn, "Relation >");
1813         if (!term_100(zm, term_sub, term_num, 1, display_term))
1814         {
1815             wrbuf_destroy(term_num);
1816             return 0;
1817         }
1818         term_value = atoi(wrbuf_cstr(term_num));
1819         gen_regular_rel(term_dict, term_value+1, 0);
1820         break;
1821     case -1:
1822     case 3:
1823         yaz_log(log_level_rpn, "Relation =");
1824         if (!term_100(zm, term_sub, term_num, 1, display_term))
1825         {
1826             wrbuf_destroy(term_num);
1827             return 0;
1828         }
1829         term_value = atoi(wrbuf_cstr(term_num));
1830         wrbuf_printf(term_dict, "(0*%d)", term_value);
1831         break;
1832     case 103:
1833         /* term_tmp untouched.. */
1834         while (**term_sub != '\0')
1835             (*term_sub)++;
1836         break;
1837     default:
1838         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1839         wrbuf_destroy(term_num);
1840         return 0;
1841     }
1842     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1843                          0, grep_info, max_pos, 0, grep_handle);
1844
1845     if (r == 1)
1846         zebra_set_partial_result(zh);
1847     else if (r)
1848         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1849     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1850     wrbuf_destroy(term_num);
1851     return 1;
1852 }
1853
1854 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1855                               const char **term_sub,
1856                               WRBUF term_dict,
1857                               const Odr_oid *attributeSet, NMEM stream,
1858                               struct grep_info *grep_info,
1859                               const char *index_type, int complete_flag,
1860                               WRBUF display_term,
1861                               const char *xpath_use,
1862                               struct ord_list **ol)
1863 {
1864     const char *termp;
1865     struct rpn_char_map_info rcmi;
1866     int max_pos;
1867     int relation_error = 0;
1868     int ord, ord_len, i;
1869     char ord_buf[32];
1870     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1871
1872     *ol = ord_list_create(stream);
1873
1874     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1875
1876     termp = *term_sub;
1877
1878     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1879                           attributeSet, &ord) != ZEBRA_OK)
1880     {
1881         return ZEBRA_FAIL;
1882     }
1883
1884     wrbuf_rewind(term_dict);
1885
1886     *ol = ord_list_append(stream, *ol, ord);
1887
1888     ord_len = key_SU_encode(ord, ord_buf);
1889
1890     wrbuf_putc(term_dict, '(');
1891     for (i = 0; i < ord_len; i++)
1892     {
1893         wrbuf_putc(term_dict, 1);
1894         wrbuf_putc(term_dict, ord_buf[i]);
1895     }
1896     wrbuf_putc(term_dict, ')');
1897
1898     if (!numeric_relation(zh, zapt, &termp, term_dict,
1899                           attributeSet, grep_info, &max_pos, zm,
1900                           display_term, &relation_error))
1901     {
1902         if (relation_error)
1903         {
1904             zebra_setError(zh, relation_error, 0);
1905             return ZEBRA_FAIL;
1906         }
1907         *term_sub = 0;
1908         return ZEBRA_OK;
1909     }
1910     *term_sub = termp;
1911     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1912     return ZEBRA_OK;
1913 }
1914
1915
1916 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1917                                         Z_AttributesPlusTerm *zapt,
1918                                         const char *termz,
1919                                         const Odr_oid *attributeSet,
1920                                         zint hits_limit,
1921                                         NMEM stream,
1922                                         const char *index_type,
1923                                         int complete_flag,
1924                                         const char *rank_type,
1925                                         const char *xpath_use,
1926                                         NMEM rset_nmem,
1927                                         RSET *rset,
1928                                         struct rset_key_control *kc)
1929 {
1930     const char *termp = termz;
1931     RSET *result_sets = 0;
1932     int num_result_sets = 0;
1933     ZEBRA_RES res;
1934     struct grep_info grep_info;
1935     int alloc_sets = 0;
1936     zint hits_limit_value = hits_limit;
1937     const char *term_ref_id_str = 0;
1938
1939     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1940                           stream);
1941
1942     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1943     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1944         return ZEBRA_FAIL;
1945     while (1)
1946     {
1947         struct ord_list *ol;
1948         WRBUF term_dict = wrbuf_alloc();
1949         WRBUF display_term = wrbuf_alloc();
1950         if (alloc_sets == num_result_sets)
1951         {
1952             int add = 10;
1953             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1954                                               sizeof(*rnew));
1955             if (alloc_sets)
1956                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1957             alloc_sets = alloc_sets + add;
1958             result_sets = rnew;
1959         }
1960         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1961         grep_info.isam_p_indx = 0;
1962         res = numeric_term(zh, zapt, &termp, term_dict,
1963                            attributeSet, stream, &grep_info,
1964                            index_type, complete_flag,
1965                            display_term, xpath_use, &ol);
1966         wrbuf_destroy(term_dict);
1967         if (res == ZEBRA_FAIL || termp == 0)
1968         {
1969             wrbuf_destroy(display_term);
1970             break;
1971         }
1972         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1973         result_sets[num_result_sets] =
1974             rset_trunc(zh, grep_info.isam_p_buf,
1975                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1976                        wrbuf_len(display_term), rank_type,
1977                        0 /* preserve position */,
1978                        zapt->term->which, rset_nmem,
1979                        kc, kc->scope, ol, index_type,
1980                        hits_limit_value,
1981                        term_ref_id_str);
1982         wrbuf_destroy(display_term);
1983         if (!result_sets[num_result_sets])
1984             break;
1985         num_result_sets++;
1986         if (!*termp)
1987             break;
1988     }
1989     grep_info_delete(&grep_info);
1990
1991     if (res != ZEBRA_OK)
1992         return res;
1993     if (num_result_sets == 0)
1994         *rset = rset_create_null(rset_nmem, kc, 0);
1995     else if (num_result_sets == 1)
1996         *rset = result_sets[0];
1997     else
1998         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1999                                 num_result_sets, result_sets);
2000     if (!*rset)
2001         return ZEBRA_FAIL;
2002     return ZEBRA_OK;
2003 }
2004
2005 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2006                                       Z_AttributesPlusTerm *zapt,
2007                                       const char *termz,
2008                                       const Odr_oid *attributeSet,
2009                                       NMEM stream,
2010                                       const char *rank_type, NMEM rset_nmem,
2011                                       RSET *rset,
2012                                       struct rset_key_control *kc)
2013 {
2014     Record rec;
2015     zint sysno = atozint(termz);
2016
2017     if (sysno <= 0)
2018         sysno = 0;
2019     rec = rec_get(zh->reg->records, sysno);
2020     if (!rec)
2021         sysno = 0;
2022
2023     rec_free(&rec);
2024
2025     if (sysno <= 0)
2026     {
2027         *rset = rset_create_null(rset_nmem, kc, 0);
2028     }
2029     else
2030     {
2031         RSFD rsfd;
2032         struct it_key key;
2033         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2034                                  res_get(zh->res, "setTmpDir"), 0);
2035         rsfd = rset_open(*rset, RSETF_WRITE);
2036
2037         key.mem[0] = sysno;
2038         key.mem[1] = 1;
2039         key.len = 2;
2040         rset_write(rsfd, &key);
2041         rset_close(rsfd);
2042     }
2043     return ZEBRA_OK;
2044 }
2045
2046 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2047                                const Odr_oid *attributeSet, NMEM stream,
2048                                Z_SortKeySpecList *sort_sequence,
2049                                const char *rank_type,
2050                                NMEM rset_nmem,
2051                                RSET *rset,
2052                                struct rset_key_control *kc)
2053 {
2054     int i;
2055     int sort_relation_value;
2056     AttrType sort_relation_type;
2057     Z_SortKeySpec *sks;
2058     Z_SortKey *sk;
2059     char termz[20];
2060
2061     attr_init_APT(&sort_relation_type, zapt, 7);
2062     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2063
2064     if (!sort_sequence->specs)
2065     {
2066         sort_sequence->num_specs = 10;
2067         sort_sequence->specs = (Z_SortKeySpec **)
2068             nmem_malloc(stream, sort_sequence->num_specs *
2069                         sizeof(*sort_sequence->specs));
2070         for (i = 0; i<sort_sequence->num_specs; i++)
2071             sort_sequence->specs[i] = 0;
2072     }
2073     if (zapt->term->which != Z_Term_general)
2074         i = 0;
2075     else
2076         i = atoi_n((char *) zapt->term->u.general->buf,
2077                    zapt->term->u.general->len);
2078     if (i >= sort_sequence->num_specs)
2079         i = 0;
2080     sprintf(termz, "%d", i);
2081
2082     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2083     sks->sortElement = (Z_SortElement *)
2084         nmem_malloc(stream, sizeof(*sks->sortElement));
2085     sks->sortElement->which = Z_SortElement_generic;
2086     sk = sks->sortElement->u.generic = (Z_SortKey *)
2087         nmem_malloc(stream, sizeof(*sk));
2088     sk->which = Z_SortKey_sortAttributes;
2089     sk->u.sortAttributes = (Z_SortAttributes *)
2090         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2091
2092     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2093     sk->u.sortAttributes->list = zapt->attributes;
2094
2095     sks->sortRelation = (Odr_int *)
2096         nmem_malloc(stream, sizeof(*sks->sortRelation));
2097     if (sort_relation_value == 1)
2098         *sks->sortRelation = Z_SortKeySpec_ascending;
2099     else if (sort_relation_value == 2)
2100         *sks->sortRelation = Z_SortKeySpec_descending;
2101     else
2102         *sks->sortRelation = Z_SortKeySpec_ascending;
2103
2104     sks->caseSensitivity = (Odr_int *)
2105         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2106     *sks->caseSensitivity = 0;
2107
2108     sks->which = Z_SortKeySpec_null;
2109     sks->u.null = odr_nullval ();
2110     sort_sequence->specs[i] = sks;
2111     *rset = rset_create_null(rset_nmem, kc, 0);
2112     return ZEBRA_OK;
2113 }
2114
2115
2116 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2117                            const Odr_oid *attributeSet,
2118                            struct xpath_location_step *xpath, int max,
2119                            NMEM mem)
2120 {
2121     const Odr_oid *curAttributeSet = attributeSet;
2122     AttrType use;
2123     const char *use_string = 0;
2124
2125     attr_init_APT(&use, zapt, 1);
2126     attr_find_ex(&use, &curAttributeSet, &use_string);
2127
2128     if (!use_string || *use_string != '/')
2129         return -1;
2130
2131     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2132 }
2133
2134
2135
2136 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2137                         const char *index_type, const char *term,
2138                         const char *xpath_use,
2139                         NMEM rset_nmem,
2140                         struct rset_key_control *kc)
2141 {
2142     struct grep_info grep_info;
2143     int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2144                                            zinfo_index_category_index,
2145                                            index_type, xpath_use);
2146     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2147         return rset_create_null(rset_nmem, kc, 0);
2148
2149     if (ord < 0)
2150         return rset_create_null(rset_nmem, kc, 0);
2151     else
2152     {
2153         int i, max_pos;
2154         char ord_buf[32];
2155         RSET rset;
2156         WRBUF term_dict = wrbuf_alloc();
2157         int ord_len = key_SU_encode(ord, ord_buf);
2158         int term_type = Z_Term_characterString;
2159         const char *flags = "void";
2160
2161         wrbuf_putc(term_dict, '(');
2162         for (i = 0; i<ord_len; i++)
2163         {
2164             wrbuf_putc(term_dict, 1);
2165             wrbuf_putc(term_dict, ord_buf[i]);
2166         }
2167         wrbuf_putc(term_dict, ')');
2168         wrbuf_puts(term_dict, term);
2169
2170         grep_info.isam_p_indx = 0;
2171         dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2172                          &grep_info, &max_pos, 0, grep_handle);
2173         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2174                 grep_info.isam_p_indx);
2175         rset = rset_trunc(zh, grep_info.isam_p_buf,
2176                           grep_info.isam_p_indx, term, strlen(term),
2177                           flags, 1, term_type, rset_nmem,
2178                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2179                           0 /* term_ref_id_str */);
2180         grep_info_delete(&grep_info);
2181         wrbuf_destroy(term_dict);
2182         return rset;
2183     }
2184 }
2185
2186 static
2187 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2188                            NMEM stream, const char *rank_type, RSET rset,
2189                            int xpath_len, struct xpath_location_step *xpath,
2190                            NMEM rset_nmem,
2191                            RSET *rset_out,
2192                            struct rset_key_control *kc)
2193 {
2194     int i;
2195     int always_matches = rset ? 0 : 1;
2196
2197     if (xpath_len < 0)
2198     {
2199         *rset_out = rset;
2200         return ZEBRA_OK;
2201     }
2202
2203     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2204     for (i = 0; i<xpath_len; i++)
2205     {
2206         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2207
2208     }
2209
2210     /*
2211     //a    ->    a/.*
2212     //a/b  ->    b/a/.*
2213     /a     ->    a/
2214     /a/b   ->    b/a/
2215
2216     /      ->    none
2217
2218     a[@attr = value]/b[@other = othervalue]
2219
2220     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2221     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2222     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2223     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2224     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2225     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2226
2227     */
2228
2229     dict_grep_cmap(zh->reg->dict, 0, 0);
2230
2231     {
2232         int level = xpath_len;
2233         int first_path = 1;
2234
2235         while (--level >= 0)
2236         {
2237             WRBUF xpath_rev = wrbuf_alloc();
2238             int i;
2239             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2240
2241             for (i = level; i >= 1; --i)
2242             {
2243                 const char *cp = xpath[i].part;
2244                 if (*cp)
2245                 {
2246                     for (; *cp; cp++)
2247                     {
2248                         if (*cp == '*')
2249                             wrbuf_puts(xpath_rev, "[^/]*");
2250                         else if (*cp == ' ')
2251                             wrbuf_puts(xpath_rev, "\001 ");
2252                         else
2253                             wrbuf_putc(xpath_rev, *cp);
2254
2255                         /* wrbuf_putc does not null-terminate , but
2256                            wrbuf_puts below ensures it does.. so xpath_rev
2257                            is OK iff length is > 0 */
2258                     }
2259                     wrbuf_puts(xpath_rev, "/");
2260                 }
2261                 else if (i == 1)  /* // case */
2262                     wrbuf_puts(xpath_rev, ".*");
2263             }
2264             if (xpath[level].predicate &&
2265                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2266                 xpath[level].predicate->u.relation.name[0])
2267             {
2268                 WRBUF wbuf = wrbuf_alloc();
2269                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2270                 if (xpath[level].predicate->u.relation.value)
2271                 {
2272                     const char *cp = xpath[level].predicate->u.relation.value;
2273                     wrbuf_putc(wbuf, '=');
2274
2275                     while (*cp)
2276                     {
2277                         if (strchr(REGEX_CHARS, *cp))
2278                             wrbuf_putc(wbuf, '\\');
2279                         wrbuf_putc(wbuf, *cp);
2280                         cp++;
2281                     }
2282                 }
2283                 rset_attr = xpath_trunc(
2284                     zh, stream, "0", wrbuf_cstr(wbuf),
2285                     ZEBRA_XPATH_ATTR_NAME,
2286                     rset_nmem, kc);
2287                 wrbuf_destroy(wbuf);
2288             }
2289             else
2290             {
2291                 if (!first_path)
2292                 {
2293                     wrbuf_destroy(xpath_rev);
2294                     continue;
2295                 }
2296             }
2297             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2298                     wrbuf_cstr(xpath_rev));
2299             if (wrbuf_len(xpath_rev))
2300             {
2301                 rset_start_tag = xpath_trunc(zh, stream, "0",
2302                                              wrbuf_cstr(xpath_rev),
2303                                              ZEBRA_XPATH_ELM_BEGIN,
2304                                              rset_nmem, kc);
2305                 if (always_matches)
2306                     rset = rset_start_tag;
2307                 else
2308                 {
2309                     rset_end_tag = xpath_trunc(zh, stream, "0",
2310                                                wrbuf_cstr(xpath_rev),
2311                                                ZEBRA_XPATH_ELM_END,
2312                                                rset_nmem, kc);
2313
2314                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2315                                                rset_start_tag, rset,
2316                                                rset_end_tag, rset_attr);
2317                 }
2318             }
2319             wrbuf_destroy(xpath_rev);
2320             first_path = 0;
2321         }
2322     }
2323     *rset_out = rset;
2324     return ZEBRA_OK;
2325 }
2326
2327 #define MAX_XPATH_STEPS 10
2328
2329 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2330                                      Z_AttributesPlusTerm *zapt,
2331                                      const Odr_oid *attributeSet,
2332                                      zint hits_limit, NMEM stream,
2333                                      Z_SortKeySpecList *sort_sequence,
2334                                      NMEM rset_nmem,
2335                                      RSET *rset,
2336                                      struct rset_key_control *kc);
2337
2338 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2339                                 const Odr_oid *attributeSet,
2340                                 zint hits_limit, NMEM stream,
2341                                 Z_SortKeySpecList *sort_sequence,
2342                                 int num_bases, const char **basenames,
2343                                 NMEM rset_nmem,
2344                                 RSET *rset,
2345                                 struct rset_key_control *kc)
2346 {
2347     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2348     ZEBRA_RES res = ZEBRA_OK;
2349     int i;
2350     for (i = 0; i < num_bases; i++)
2351     {
2352
2353         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2354         {
2355             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2356                            basenames[i]);
2357             res = ZEBRA_FAIL;
2358             break;
2359         }
2360         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2361                                   sort_sequence,
2362                                   rset_nmem, rsets+i, kc);
2363         if (res != ZEBRA_OK)
2364             break;
2365     }
2366     if (res != ZEBRA_OK)
2367     {   /* must clean up the already created sets */
2368         while (--i >= 0)
2369             rset_delete(rsets[i]);
2370         *rset = 0;
2371     }
2372     else
2373     {
2374         if (num_bases == 1)
2375             *rset = rsets[0];
2376         else if (num_bases == 0)
2377             *rset = rset_create_null(rset_nmem, kc, 0);
2378         else
2379             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2380                                    num_bases, rsets);
2381     }
2382     return res;
2383 }
2384
2385 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2386                                      Z_AttributesPlusTerm *zapt,
2387                                      const Odr_oid *attributeSet,
2388                                      zint hits_limit, NMEM stream,
2389                                      Z_SortKeySpecList *sort_sequence,
2390                                      NMEM rset_nmem,
2391                                      RSET *rset,
2392                                      struct rset_key_control *kc)
2393 {
2394     ZEBRA_RES res = ZEBRA_OK;
2395     const char *index_type;
2396     char *search_type = NULL;
2397     char rank_type[128];
2398     int complete_flag;
2399     int sort_flag;
2400     char termz[IT_MAX_WORD+1];
2401     int xpath_len;
2402     const char *xpath_use = 0;
2403     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2404
2405     if (!log_level_set)
2406     {
2407         log_level_rpn = yaz_log_module_level("rpn");
2408         log_level_set = 1;
2409     }
2410     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2411                     rank_type, &complete_flag, &sort_flag);
2412
2413     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2414     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2415     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2416     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2417
2418     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2419         return ZEBRA_FAIL;
2420
2421     if (sort_flag)
2422         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2423                              rank_type, rset_nmem, rset, kc);
2424     /* consider if an X-Path query is used */
2425     xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2426                                 xpath, MAX_XPATH_STEPS, stream);
2427     if (xpath_len >= 0)
2428     {
2429         if (xpath[xpath_len-1].part[0] == '@')
2430             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2431         else
2432             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */
2433
2434         if (1)
2435         {
2436             AttrType relation;
2437             int relation_value;
2438
2439             attr_init_APT(&relation, zapt, 2);
2440             relation_value = attr_find(&relation, NULL);
2441
2442             if (relation_value == 103) /* alwaysmatches */
2443             {
2444                 *rset = 0; /* signal no "term" set */
2445                 return rpn_search_xpath(zh, stream, rank_type, *rset,
2446                                         xpath_len, xpath, rset_nmem, rset, kc);
2447             }
2448         }
2449     }
2450
2451     /* search using one of the various search type strategies
2452        termz is our UTF-8 search term
2453        attributeSet is top-level default attribute set
2454        stream is ODR for search
2455        reg_id is the register type
2456        complete_flag is 1 for complete subfield, 0 for incomplete
2457        xpath_use is use-attribute to be used for X-Path search, 0 for none
2458     */
2459     if (!strcmp(search_type, "phrase"))
2460     {
2461         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2462                                     stream,
2463                                     index_type, complete_flag, rank_type,
2464                                     xpath_use,
2465                                     rset_nmem,
2466                                     rset, kc);
2467     }
2468     else if (!strcmp(search_type, "and-list"))
2469     {
2470         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2471                                       stream,
2472                                       index_type, complete_flag, rank_type,
2473                                       xpath_use,
2474                                       rset_nmem,
2475                                       rset, kc);
2476     }
2477     else if (!strcmp(search_type, "or-list"))
2478     {
2479         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2480                                      stream,
2481                                      index_type, complete_flag, rank_type,
2482                                      xpath_use,
2483                                      rset_nmem,
2484                                      rset, kc);
2485     }
2486     else if (!strcmp(search_type, "local"))
2487     {
2488         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2489                                    rank_type, rset_nmem, rset, kc);
2490     }
2491     else if (!strcmp(search_type, "numeric"))
2492     {
2493         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2494                                      stream,
2495                                      index_type, complete_flag, rank_type,
2496                                      xpath_use,
2497                                      rset_nmem,
2498                                      rset, kc);
2499     }
2500     else
2501     {
2502         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2503         res = ZEBRA_FAIL;
2504     }
2505     if (res != ZEBRA_OK)
2506         return res;
2507     if (!*rset)
2508         return ZEBRA_FAIL;
2509     return rpn_search_xpath(zh, stream, rank_type, *rset,
2510                             xpath_len, xpath, rset_nmem, rset, kc);
2511 }
2512
2513 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2514                                       const Odr_oid *attributeSet,
2515                                       zint hits_limit,
2516                                       NMEM stream, NMEM rset_nmem,
2517                                       Z_SortKeySpecList *sort_sequence,
2518                                       int num_bases, const char **basenames,
2519                                       RSET **result_sets, int *num_result_sets,
2520                                       Z_Operator *parent_op,
2521                                       struct rset_key_control *kc);
2522
2523 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2524                                    zint *approx_limit)
2525 {
2526     ZEBRA_RES res = ZEBRA_OK;
2527     if (zs->which == Z_RPNStructure_complex)
2528     {
2529         if (res == ZEBRA_OK)
2530             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2531                                            approx_limit);
2532         if (res == ZEBRA_OK)
2533             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2534                                            approx_limit);
2535     }
2536     else if (zs->which == Z_RPNStructure_simple)
2537     {
2538         if (zs->u.simple->which == Z_Operand_APT)
2539         {
2540             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2541             AttrType global_hits_limit_attr;
2542             int l;
2543
2544             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2545
2546             l = attr_find(&global_hits_limit_attr, NULL);
2547             if (l != -1)
2548                 *approx_limit = l;
2549         }
2550     }
2551     return res;
2552 }
2553
2554 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2555                          const Odr_oid *attributeSet,
2556                          zint hits_limit,
2557                          NMEM stream, NMEM rset_nmem,
2558                          Z_SortKeySpecList *sort_sequence,
2559                          int num_bases, const char **basenames,
2560                          RSET *result_set)
2561 {
2562     RSET *result_sets = 0;
2563     int num_result_sets = 0;
2564     ZEBRA_RES res;
2565     struct rset_key_control *kc = zebra_key_control_create(zh);
2566
2567     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2568                                stream, rset_nmem,
2569                                sort_sequence,
2570                                num_bases, basenames,
2571                                &result_sets, &num_result_sets,
2572                                0 /* no parent op */,
2573                                kc);
2574     if (res != ZEBRA_OK)
2575     {
2576         int i;
2577         for (i = 0; i<num_result_sets; i++)
2578             rset_delete(result_sets[i]);
2579         *result_set = 0;
2580     }
2581     else
2582     {
2583         assert(num_result_sets == 1);
2584         assert(result_sets);
2585         assert(*result_sets);
2586         *result_set = *result_sets;
2587     }
2588     (*kc->dec)(kc);
2589     return res;
2590 }
2591
2592 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2593                                const Odr_oid *attributeSet, zint hits_limit,
2594                                NMEM stream, NMEM rset_nmem,
2595                                Z_SortKeySpecList *sort_sequence,
2596                                int num_bases, const char **basenames,
2597                                RSET **result_sets, int *num_result_sets,
2598                                Z_Operator *parent_op,
2599                                struct rset_key_control *kc)
2600 {
2601     *num_result_sets = 0;
2602     if (zs->which == Z_RPNStructure_complex)
2603     {
2604         ZEBRA_RES res;
2605         Z_Operator *zop = zs->u.complex->roperator;
2606         RSET *result_sets_l = 0;
2607         int num_result_sets_l = 0;
2608         RSET *result_sets_r = 0;
2609         int num_result_sets_r = 0;
2610
2611         res = rpn_search_structure(zh, zs->u.complex->s1,
2612                                    attributeSet, hits_limit, stream, rset_nmem,
2613                                    sort_sequence,
2614                                    num_bases, basenames,
2615                                    &result_sets_l, &num_result_sets_l,
2616                                    zop, kc);
2617         if (res != ZEBRA_OK)
2618         {
2619             int i;
2620             for (i = 0; i<num_result_sets_l; i++)
2621                 rset_delete(result_sets_l[i]);
2622             return res;
2623         }
2624         res = rpn_search_structure(zh, zs->u.complex->s2,
2625                                    attributeSet, hits_limit, stream, rset_nmem,
2626                                    sort_sequence,
2627                                    num_bases, basenames,
2628                                    &result_sets_r, &num_result_sets_r,
2629                                    zop, kc);
2630         if (res != ZEBRA_OK)
2631         {
2632             int i;
2633             for (i = 0; i<num_result_sets_l; i++)
2634                 rset_delete(result_sets_l[i]);
2635             for (i = 0; i<num_result_sets_r; i++)
2636                 rset_delete(result_sets_r[i]);
2637             return res;
2638         }
2639
2640         /* make a new list of result for all children */
2641         *num_result_sets = num_result_sets_l + num_result_sets_r;
2642         *result_sets = nmem_malloc(stream, *num_result_sets *
2643                                    sizeof(**result_sets));
2644         memcpy(*result_sets, result_sets_l,
2645                num_result_sets_l * sizeof(**result_sets));
2646         memcpy(*result_sets + num_result_sets_l, result_sets_r,
2647                num_result_sets_r * sizeof(**result_sets));
2648
2649         if (!parent_op || parent_op->which != zop->which
2650             || (zop->which != Z_Operator_and &&
2651                 zop->which != Z_Operator_or))
2652         {
2653             /* parent node different from this one (or non-present) */
2654             /* we must combine result sets now */
2655             RSET rset;
2656             switch (zop->which)
2657             {
2658             case Z_Operator_and:
2659                 rset = rset_create_and(rset_nmem, kc,
2660                                        kc->scope,
2661                                        *num_result_sets, *result_sets);
2662                 break;
2663             case Z_Operator_or:
2664                 rset = rset_create_or(rset_nmem, kc,
2665                                       kc->scope, 0, /* termid */
2666                                       *num_result_sets, *result_sets);
2667                 break;
2668             case Z_Operator_and_not:
2669                 rset = rset_create_not(rset_nmem, kc,
2670                                        kc->scope,
2671                                        (*result_sets)[0],
2672                                        (*result_sets)[1]);
2673                 break;
2674             case Z_Operator_prox:
2675                 if (zop->u.prox->which != Z_ProximityOperator_known)
2676                 {
2677                     zebra_setError(zh,
2678                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2679                                    0);
2680                     return ZEBRA_FAIL;
2681                 }
2682                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2683                 {
2684                     zebra_setError_zint(zh,
2685                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2686                                         *zop->u.prox->u.known);
2687                     return ZEBRA_FAIL;
2688                 }
2689                 else
2690                 {
2691                     rset = rset_create_prox(rset_nmem, kc,
2692                                             kc->scope,
2693                                             *num_result_sets, *result_sets,
2694                                             *zop->u.prox->ordered,
2695                                             (!zop->u.prox->exclusion ?
2696                                              0 : *zop->u.prox->exclusion),
2697                                             *zop->u.prox->relationType,
2698                                             *zop->u.prox->distance );
2699                 }
2700                 break;
2701             default:
2702                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2703                 return ZEBRA_FAIL;
2704             }
2705             *num_result_sets = 1;
2706             *result_sets = nmem_malloc(stream, *num_result_sets *
2707                                        sizeof(**result_sets));
2708             (*result_sets)[0] = rset;
2709         }
2710     }
2711     else if (zs->which == Z_RPNStructure_simple)
2712     {
2713         RSET rset;
2714         ZEBRA_RES res;
2715
2716         if (zs->u.simple->which == Z_Operand_APT)
2717         {
2718             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2719             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2720                                  attributeSet, hits_limit,
2721                                  stream, sort_sequence,
2722                                  num_bases, basenames, rset_nmem, &rset,
2723                                  kc);
2724             if (res != ZEBRA_OK)
2725                 return res;
2726         }
2727         else if (zs->u.simple->which == Z_Operand_resultSetId)
2728         {
2729             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2730             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2731             if (!rset)
2732             {
2733                 zebra_setError(zh,
2734                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2735                                zs->u.simple->u.resultSetId);
2736                 return ZEBRA_FAIL;
2737             }
2738             rset_dup(rset);
2739         }
2740         else
2741         {
2742             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2743             return ZEBRA_FAIL;
2744         }
2745         *num_result_sets = 1;
2746         *result_sets = nmem_malloc(stream, *num_result_sets *
2747                                    sizeof(**result_sets));
2748         (*result_sets)[0] = rset;
2749     }
2750     else
2751     {
2752         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2753         return ZEBRA_FAIL;
2754     }
2755     return ZEBRA_OK;
2756 }
2757
2758
2759
2760 /*
2761  * Local variables:
2762  * c-basic-offset: 4
2763  * c-file-style: "Stroustrup"
2764  * indent-tabs-mode: nil
2765  * End:
2766  * vim: shiftwidth=4 tabstop=8 expandtab
2767  */
2768