7733a5d9e651b306ded58c3ca15adf9539b2503a
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2011 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT
75
76 struct grep_info {
77 #ifdef TERM_COUNT
78     int *term_no;
79 #endif
80     ISAM_P *isam_p_buf;
81     int isam_p_size;
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106         int *new_term_no;
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 static int term_102_icu(zebra_map_t zm,
238                         const char **src, WRBUF term_dict, int space_split,
239                         WRBUF display_term)
240 {
241     int no_terms = 0;
242     const char *s0 = *src, *s1;
243     while (*s0 == ' ')
244         s0++;
245     s1 = s0;
246     for (;;)
247     {
248         if (*s1 == ' ' && space_split)
249             break;
250         else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
251             s1++;
252         else
253         {
254             /* EOF or regex reserved char */
255             if (s0 != s1)
256             {
257                 const char *res_buf = 0;
258                 size_t res_len = 0;
259                 const char *display_buf;
260                 size_t display_len;
261
262                 zebra_map_tokenize_start(zm, s0, s1 - s0);
263
264                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
265                                             &display_buf, &display_len))
266                 {
267                     size_t i = res_len;
268                     while (--i >= 0 && res_buf[i] != '\x01')
269                         ;
270                     if (i > 0)
271                     {
272                         while (--i >= 0 && res_buf[i] != '\x01')
273                             ;
274                     }
275                     res_len = i; /* reduce res_len */
276                     for (i = 0; i < res_len; i++)
277                     {
278                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
279                             wrbuf_putc(term_dict, '\\');
280                         if (res_buf[i] < 32)
281                             wrbuf_putc(term_dict, '\x01');
282
283                         wrbuf_putc(term_dict, res_buf[i]);
284                     }
285                     wrbuf_write(display_term, display_buf, display_len);
286
287                     no_terms++;
288                 }
289             }
290             if (*s1 == '\0')
291                 break;
292
293             wrbuf_putc(term_dict, *s1);
294             wrbuf_putc(display_term, *s1);
295
296             s1++;
297             s0 = s1;
298         }
299     }
300     if (no_terms)
301         wrbuf_puts(term_dict, "\x01\x01.*");
302     *src = s1;
303     return no_terms;
304 }
305
306 static int term_100_icu(zebra_map_t zm,
307                         const char **src, WRBUF term_dict, int space_split,
308                         WRBUF display_term,
309                         int mode)
310 {
311     size_t i;
312     const char *res_buf = 0;
313     size_t res_len = 0;
314     const char *display_buf;
315     size_t display_len;
316     const char *s0 = *src, *s1;
317
318     while (*s0 == ' ')
319         s0++;
320
321     if (*s0 == '\0')
322         return 0;
323
324     if (space_split)
325     {
326         s1 = s0;
327         while (*s1 && *s1 != ' ')
328             s1++;
329     }
330     else
331         s1 = s0 + strlen(s0);
332
333     *src = s1;
334
335     zebra_map_tokenize_start(zm, s0, s1 - s0);
336
337     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
338                                  &display_buf, &display_len))
339     {
340         return 0;
341     }
342     wrbuf_write(display_term, display_buf, display_len);
343     if (mode)
344     {
345         /* ICU sort keys seem to be of the form
346            basechars \x01 accents \x01 length
347            For now we'll just right truncate from basechars . This
348            may give false hits due to accents not being used.
349         */
350         i = res_len;
351         while (--i >= 0 && res_buf[i] != '\x01')
352             ;
353         if (i > 0)
354         {
355             while (--i >= 0 && res_buf[i] != '\x01')
356                 ;
357         }
358         if (i == 0)
359         {  /* did not find base chars at all. Throw error */
360             return -1;
361         }
362         res_len = i; /* reduce res_len */
363     }
364     if (mode & 2)
365         wrbuf_puts(term_dict, ".*");
366     for (i = 0; i < res_len; i++)
367     {
368         if (strchr(REGEX_CHARS "\\", res_buf[i]))
369             wrbuf_putc(term_dict, '\\');
370         if (res_buf[i] < 32)
371             wrbuf_putc(term_dict, '\x01');
372
373         wrbuf_putc(term_dict, res_buf[i]);
374     }
375     if (mode & 1)
376         wrbuf_puts(term_dict, ".*");
377     else if (mode)
378         wrbuf_puts(term_dict, "\x01\x01.*");
379     return 1;
380 }
381
382 /* term_100: handle term, where trunc = none(no operators at all) */
383 static int term_100(zebra_map_t zm,
384                     const char **src, WRBUF term_dict, int space_split,
385                     WRBUF display_term)
386 {
387     const char *s0;
388     const char **map;
389     int i = 0;
390
391     const char *space_start = 0;
392     const char *space_end = 0;
393
394     if (!term_pre(zm, src, 0, !space_split))
395         return 0;
396     s0 = *src;
397     while (*s0)
398     {
399         const char *s1 = s0;
400         int q_map_match = 0;
401         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
402         if (space_split)
403         {
404             if (**map == *CHR_SPACE)
405                 break;
406         }
407         else  /* complete subfield only. */
408         {
409             if (**map == *CHR_SPACE)
410             {   /* save space mapping for later  .. */
411                 space_start = s1;
412                 space_end = s0;
413                 continue;
414             }
415             else if (space_start)
416             {   /* reload last space */
417                 while (space_start < space_end)
418                 {
419                     if (strchr(REGEX_CHARS, *space_start))
420                         wrbuf_putc(term_dict, '\\');
421                     wrbuf_putc(display_term, *space_start);
422                     wrbuf_putc(term_dict, *space_start);
423                     space_start++;
424
425                 }
426                 /* and reset */
427                 space_start = space_end = 0;
428             }
429         }
430         i++;
431
432         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
433     }
434     *src = s0;
435     return i;
436 }
437
438 /* term_101: handle term, where trunc = Process # */
439 static int term_101(zebra_map_t zm,
440                     const char **src, WRBUF term_dict, int space_split,
441                     WRBUF display_term)
442 {
443     const char *s0;
444     const char **map;
445     int i = 0;
446
447     if (!term_pre(zm, src, "#", !space_split))
448         return 0;
449     s0 = *src;
450     while (*s0)
451     {
452         if (*s0 == '#')
453         {
454             i++;
455             wrbuf_puts(term_dict, ".*");
456             wrbuf_putc(display_term, *s0);
457             s0++;
458         }
459         else
460         {
461             const char *s1 = s0;
462             int q_map_match = 0;
463             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
464             if (space_split && **map == *CHR_SPACE)
465                 break;
466
467             i++;
468             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
469         }
470     }
471     *src = s0;
472     return i;
473 }
474
475 /* term_103: handle term, where trunc = re-2 (regular expressions) */
476 static int term_103(zebra_map_t zm, const char **src,
477                     WRBUF term_dict, int *errors, int space_split,
478                     WRBUF display_term)
479 {
480     int i = 0;
481     const char *s0;
482     const char **map;
483
484     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
485         return 0;
486     s0 = *src;
487     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
488         isdigit(((const unsigned char *)s0)[1]))
489     {
490         *errors = s0[1] - '0';
491         s0 += 3;
492         if (*errors > 3)
493             *errors = 3;
494     }
495     while (*s0)
496     {
497         if (strchr("^\\()[].*+?|-", *s0))
498         {
499             wrbuf_putc(display_term, *s0);
500             wrbuf_putc(term_dict, *s0);
501             s0++;
502             i++;
503         }
504         else
505         {
506             const char *s1 = s0;
507             int q_map_match = 0;
508             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
509             if (space_split && **map == *CHR_SPACE)
510                 break;
511
512             i++;
513             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
514         }
515     }
516     *src = s0;
517
518     return i;
519 }
520
521 /* term_103: handle term, where trunc = re-1 (regular expressions) */
522 static int term_102(zebra_map_t zm, const char **src,
523                     WRBUF term_dict, int space_split, WRBUF display_term)
524 {
525     return term_103(zm, src, term_dict, NULL, space_split, display_term);
526 }
527
528
529 /* term_104: handle term, process ?n * # */
530 static int term_104(zebra_map_t zm, const char **src,
531                     WRBUF term_dict, int space_split, WRBUF display_term)
532 {
533     const char *s0;
534     const char **map;
535     int i = 0;
536
537     if (!term_pre(zm, src, "?*#", !space_split))
538         return 0;
539     s0 = *src;
540     while (*s0)
541     {
542         if (*s0 == '?')
543         {
544             i++;
545             wrbuf_putc(display_term, *s0);
546             s0++;
547             if (*s0 >= '0' && *s0 <= '9')
548             {
549                 int limit = 0;
550                 while (*s0 >= '0' && *s0 <= '9')
551                 {
552                     limit = limit * 10 + (*s0 - '0');
553                     wrbuf_putc(display_term, *s0);
554                     s0++;
555                 }
556                 if (limit > 20)
557                     limit = 20;
558                 while (--limit >= 0)
559                 {
560                     wrbuf_puts(term_dict, ".?");
561                 }
562             }
563             else
564             {
565                 wrbuf_puts(term_dict, ".*");
566             }
567         }
568         else if (*s0 == '*')
569         {
570             i++;
571             wrbuf_puts(term_dict, ".*");
572             wrbuf_putc(display_term, *s0);
573             s0++;
574         }
575         else if (*s0 == '#')
576         {
577             i++;
578             wrbuf_puts(term_dict, ".");
579             wrbuf_putc(display_term, *s0);
580             s0++;
581         }
582         else
583         {
584             const char *s1 = s0;
585             int q_map_match = 0;
586             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
587             if (space_split && **map == *CHR_SPACE)
588                 break;
589
590             i++;
591             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
592         }
593     }
594     *src = s0;
595     return i;
596 }
597
598 /* term_105/106: handle term, process * ! and possibly right_truncate */
599 static int term_105(zebra_map_t zm, const char **src,
600                     WRBUF term_dict, int space_split,
601                     WRBUF display_term, int right_truncate)
602 {
603     const char *s0;
604     const char **map;
605     int i = 0;
606
607     if (!term_pre(zm, src, "\\*!", !space_split))
608         return 0;
609     s0 = *src;
610     while (*s0)
611     {
612         if (*s0 == '*')
613         {
614             i++;
615             wrbuf_puts(term_dict, ".*");
616             wrbuf_putc(display_term, *s0);
617             s0++;
618         }
619         else if (*s0 == '!')
620         {
621             i++;
622             wrbuf_putc(term_dict, '.');
623             wrbuf_putc(display_term, *s0);
624             s0++;
625         }
626         else if (*s0 == '\\')
627         {
628             i++;
629             wrbuf_puts(term_dict, "\\\\");
630             wrbuf_putc(display_term, *s0);
631             s0++;
632         }
633         else
634         {
635             const char *s1 = s0;
636             int q_map_match = 0;
637             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
638             if (space_split && **map == *CHR_SPACE)
639                 break;
640
641             i++;
642             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
643         }
644     }
645     if (right_truncate)
646         wrbuf_puts(term_dict, ".*");
647     *src = s0;
648     return i;
649 }
650
651
652 /* gen_regular_rel - generate regular expression from relation
653  *  val:     border value (inclusive)
654  *  islt:    1 if <=; 0 if >=.
655  */
656 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
657 {
658     char dst_buf[20*5*20]; /* assuming enough for expansion */
659     char *dst = dst_buf;
660     int dst_p;
661     int w, d, i;
662     int pos = 0;
663     char numstr[20];
664
665     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
666     if (val >= 0)
667     {
668         if (islt)
669             strcpy(dst, "(-[0-9]+|(");
670         else
671             strcpy(dst, "((");
672     }
673     else
674     {
675         if (!islt)
676         {
677             strcpy(dst, "([0-9]+|-(");
678             islt = 1;
679         }
680         else
681         {
682             strcpy(dst, "(-(");
683             islt = 0;
684         }
685         val = -val;
686     }
687     dst_p = strlen(dst);
688     sprintf(numstr, "%d", val);
689     for (w = strlen(numstr); --w >= 0; pos++)
690     {
691         d = numstr[w];
692         if (pos > 0)
693         {
694             if (islt)
695             {
696                 if (d == '0')
697                     continue;
698                 d--;
699             }
700             else
701             {
702                 if (d == '9')
703                     continue;
704                 d++;
705             }
706         }
707
708         strcpy(dst + dst_p, numstr);
709         dst_p = strlen(dst) - pos - 1;
710
711         if (islt)
712         {
713             if (d != '0')
714             {
715                 dst[dst_p++] = '[';
716                 dst[dst_p++] = '0';
717                 dst[dst_p++] = '-';
718                 dst[dst_p++] = d;
719                 dst[dst_p++] = ']';
720             }
721             else
722                 dst[dst_p++] = d;
723         }
724         else
725         {
726             if (d != '9')
727             {
728                 dst[dst_p++] = '[';
729                 dst[dst_p++] = d;
730                 dst[dst_p++] = '-';
731                 dst[dst_p++] = '9';
732                 dst[dst_p++] = ']';
733             }
734             else
735                 dst[dst_p++] = d;
736         }
737         for (i = 0; i<pos; i++)
738         {
739             dst[dst_p++] = '[';
740             dst[dst_p++] = '0';
741             dst[dst_p++] = '-';
742             dst[dst_p++] = '9';
743             dst[dst_p++] = ']';
744         }
745         dst[dst_p++] = '|';
746     }
747     dst[dst_p] = '\0';
748     if (islt)
749     {
750         /* match everything less than 10^(pos-1) */
751         strcat(dst, "0*");
752         for (i = 1; i<pos; i++)
753             strcat(dst, "[0-9]?");
754     }
755     else
756     {
757         /* match everything greater than 10^pos */
758         for (i = 0; i <= pos; i++)
759             strcat(dst, "[0-9]");
760         strcat(dst, "[0-9]*");
761     }
762     strcat(dst, "))");
763     wrbuf_puts(term_dict, dst);
764 }
765
766 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
767 {
768     const char *src = wrbuf_cstr(wsrc);
769     if (src[*indx] == '\\')
770     {
771         wrbuf_putc(term_p, src[*indx]);
772         (*indx)++;
773     }
774     wrbuf_putc(term_p, src[*indx]);
775     (*indx)++;
776 }
777
778 /*
779  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
780  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
781  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
782  *              ([^-a].*|a[^-b].*|ab[c-].*)
783  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
784  *              ([^a-].*|a[^b-].*|ab[^c-].*)
785  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
786  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
787  */
788 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
789                            const char **term_sub, WRBUF term_dict,
790                            const Odr_oid *attributeSet,
791                            zebra_map_t zm, int space_split,
792                            WRBUF display_term,
793                            int *error_code)
794 {
795     AttrType relation;
796     int relation_value;
797     int i;
798     WRBUF term_component = wrbuf_alloc();
799
800     attr_init_APT(&relation, zapt, 2);
801     relation_value = attr_find(&relation, NULL);
802
803     *error_code = 0;
804     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
805     switch (relation_value)
806     {
807     case 1:
808         if (!term_100(zm, term_sub, term_component, space_split, display_term))
809         {
810             wrbuf_destroy(term_component);
811             return 0;
812         }
813         yaz_log(log_level_rpn, "Relation <");
814
815         wrbuf_putc(term_dict, '(');
816         for (i = 0; i < wrbuf_len(term_component); )
817         {
818             int j = 0;
819
820             if (i)
821                 wrbuf_putc(term_dict, '|');
822             while (j < i)
823                 string_rel_add_char(term_dict, term_component, &j);
824
825             wrbuf_putc(term_dict, '[');
826
827             wrbuf_putc(term_dict, '^');
828
829             wrbuf_putc(term_dict, 1);
830             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
831
832             string_rel_add_char(term_dict, term_component, &i);
833             wrbuf_putc(term_dict, '-');
834
835             wrbuf_putc(term_dict, ']');
836             wrbuf_putc(term_dict, '.');
837             wrbuf_putc(term_dict, '*');
838         }
839         wrbuf_putc(term_dict, ')');
840         break;
841     case 2:
842         if (!term_100(zm, term_sub, term_component, space_split, display_term))
843         {
844             wrbuf_destroy(term_component);
845             return 0;
846         }
847         yaz_log(log_level_rpn, "Relation <=");
848
849         wrbuf_putc(term_dict, '(');
850         for (i = 0; i < wrbuf_len(term_component); )
851         {
852             int j = 0;
853
854             while (j < i)
855                 string_rel_add_char(term_dict, term_component, &j);
856             wrbuf_putc(term_dict, '[');
857
858             wrbuf_putc(term_dict, '^');
859
860             wrbuf_putc(term_dict, 1);
861             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
862
863             string_rel_add_char(term_dict, term_component, &i);
864             wrbuf_putc(term_dict, '-');
865
866             wrbuf_putc(term_dict, ']');
867             wrbuf_putc(term_dict, '.');
868             wrbuf_putc(term_dict, '*');
869
870             wrbuf_putc(term_dict, '|');
871         }
872         for (i = 0; i < wrbuf_len(term_component); )
873             string_rel_add_char(term_dict, term_component, &i);
874         wrbuf_putc(term_dict, ')');
875         break;
876     case 5:
877         if (!term_100(zm, term_sub, term_component, space_split, display_term))
878         {
879             wrbuf_destroy(term_component);
880             return 0;
881         }
882         yaz_log(log_level_rpn, "Relation >");
883
884         wrbuf_putc(term_dict, '(');
885         for (i = 0; i < wrbuf_len(term_component); )
886         {
887             int j = 0;
888
889             while (j < i)
890                 string_rel_add_char(term_dict, term_component, &j);
891             wrbuf_putc(term_dict, '[');
892
893             wrbuf_putc(term_dict, '^');
894             wrbuf_putc(term_dict, '-');
895             string_rel_add_char(term_dict, term_component, &i);
896
897             wrbuf_putc(term_dict, ']');
898             wrbuf_putc(term_dict, '.');
899             wrbuf_putc(term_dict, '*');
900
901             wrbuf_putc(term_dict, '|');
902         }
903         for (i = 0; i < wrbuf_len(term_component); )
904             string_rel_add_char(term_dict, term_component, &i);
905         wrbuf_putc(term_dict, '.');
906         wrbuf_putc(term_dict, '+');
907         wrbuf_putc(term_dict, ')');
908         break;
909     case 4:
910         if (!term_100(zm, term_sub, term_component, space_split, display_term))
911         {
912             wrbuf_destroy(term_component);
913             return 0;
914         }
915         yaz_log(log_level_rpn, "Relation >=");
916
917         wrbuf_putc(term_dict, '(');
918         for (i = 0; i < wrbuf_len(term_component); )
919         {
920             int j = 0;
921
922             if (i)
923                 wrbuf_putc(term_dict, '|');
924             while (j < i)
925                 string_rel_add_char(term_dict, term_component, &j);
926             wrbuf_putc(term_dict, '[');
927
928             if (i < wrbuf_len(term_component)-1)
929             {
930                 wrbuf_putc(term_dict, '^');
931                 wrbuf_putc(term_dict, '-');
932                 string_rel_add_char(term_dict, term_component, &i);
933             }
934             else
935             {
936                 string_rel_add_char(term_dict, term_component, &i);
937                 wrbuf_putc(term_dict, '-');
938             }
939             wrbuf_putc(term_dict, ']');
940             wrbuf_putc(term_dict, '.');
941             wrbuf_putc(term_dict, '*');
942         }
943         wrbuf_putc(term_dict, ')');
944         break;
945     case 3:
946     case 102:
947     case -1:
948         if (!**term_sub)
949             return 1;
950         yaz_log(log_level_rpn, "Relation =");
951         if (!term_100(zm, term_sub, term_component, space_split, display_term))
952         {
953             wrbuf_destroy(term_component);
954             return 0;
955         }
956         wrbuf_puts(term_dict, "(");
957         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
958         wrbuf_puts(term_dict, ")");
959         break;
960     case 103:
961         yaz_log(log_level_rpn, "Relation always matches");
962         /* skip to end of term (we don't care what it is) */
963         while (**term_sub != '\0')
964             (*term_sub)++;
965         break;
966     default:
967         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
968         wrbuf_destroy(term_component);
969         return 0;
970     }
971     wrbuf_destroy(term_component);
972     return 1;
973 }
974
975 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
976                              const char **term_sub,
977                              WRBUF term_dict,
978                              const Odr_oid *attributeSet, NMEM stream,
979                              struct grep_info *grep_info,
980                              const char *index_type, int complete_flag,
981                              WRBUF display_term,
982                              const char *xpath_use,
983                              struct ord_list **ol,
984                              zebra_map_t zm);
985
986 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
987                                 Z_AttributesPlusTerm *zapt,
988                                 zint *hits_limit_value,
989                                 const char **term_ref_id_str,
990                                 NMEM nmem)
991 {
992     AttrType term_ref_id_attr;
993     AttrType hits_limit_attr;
994     int term_ref_id_int;
995     zint hits_limit_from_attr;
996
997     attr_init_APT(&hits_limit_attr, zapt, 11);
998     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
999
1000     attr_init_APT(&term_ref_id_attr, zapt, 10);
1001     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
1002     if (term_ref_id_int >= 0)
1003     {
1004         char *res = nmem_malloc(nmem, 20);
1005         sprintf(res, "%d", term_ref_id_int);
1006         *term_ref_id_str = res;
1007     }
1008     if (hits_limit_from_attr != -1)
1009         *hits_limit_value = hits_limit_from_attr;
1010
1011     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1012             *term_ref_id_str ? *term_ref_id_str : "none",
1013             *hits_limit_value);
1014     return ZEBRA_OK;
1015 }
1016
1017 /** \brief search for term (which may be truncated)
1018  */
1019 static ZEBRA_RES search_term(ZebraHandle zh,
1020                              Z_AttributesPlusTerm *zapt,
1021                              const char **term_sub,
1022                              const Odr_oid *attributeSet,
1023                              zint hits_limit, NMEM stream,
1024                              struct grep_info *grep_info,
1025                              const char *index_type, int complete_flag,
1026                              const char *rank_type,
1027                              const char *xpath_use,
1028                              NMEM rset_nmem,
1029                              RSET *rset,
1030                              struct rset_key_control *kc,
1031                              zebra_map_t zm)
1032 {
1033     ZEBRA_RES res;
1034     struct ord_list *ol;
1035     zint hits_limit_value = hits_limit;
1036     const char *term_ref_id_str = 0;
1037     WRBUF term_dict = wrbuf_alloc();
1038     WRBUF display_term = wrbuf_alloc();
1039     *rset = 0;
1040     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1041                           stream);
1042     grep_info->isam_p_indx = 0;
1043     res = string_term(zh, zapt, term_sub, term_dict,
1044                       attributeSet, stream, grep_info,
1045                       index_type, complete_flag,
1046                       display_term, xpath_use, &ol, zm);
1047     wrbuf_destroy(term_dict);
1048     if (res == ZEBRA_OK && *term_sub)
1049     {
1050         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1051         *rset = rset_trunc(zh, grep_info->isam_p_buf,
1052                            grep_info->isam_p_indx, wrbuf_buf(display_term),
1053                            wrbuf_len(display_term), rank_type,
1054                            1 /* preserve pos */,
1055                            zapt->term->which, rset_nmem,
1056                            kc, kc->scope, ol, index_type, hits_limit_value,
1057                            term_ref_id_str);
1058         if (!*rset)
1059             res = ZEBRA_FAIL;
1060     }
1061     wrbuf_destroy(display_term);
1062     return res;
1063 }
1064
1065 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1066                              const char **term_sub,
1067                              WRBUF term_dict,
1068                              const Odr_oid *attributeSet, NMEM stream,
1069                              struct grep_info *grep_info,
1070                              const char *index_type, int complete_flag,
1071                              WRBUF display_term,
1072                              const char *xpath_use,
1073                              struct ord_list **ol,
1074                              zebra_map_t zm)
1075 {
1076     int r;
1077     AttrType truncation;
1078     int truncation_value;
1079     const char *termp;
1080     struct rpn_char_map_info rcmi;
1081
1082     int space_split = complete_flag ? 0 : 1;
1083     int ord = -1;
1084     int regex_range = 0;
1085     int max_pos, prefix_len = 0;
1086     int relation_error;
1087     char ord_buf[32];
1088     int ord_len, i;
1089
1090     *ol = ord_list_create(stream);
1091
1092     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1093     attr_init_APT(&truncation, zapt, 5);
1094     truncation_value = attr_find(&truncation, NULL);
1095     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1096
1097     termp = *term_sub; /* start of term for each database */
1098
1099     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1100                           attributeSet, &ord) != ZEBRA_OK)
1101     {
1102         *term_sub = 0;
1103         return ZEBRA_FAIL;
1104     }
1105
1106     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1107
1108     *ol = ord_list_append(stream, *ol, ord);
1109     ord_len = key_SU_encode(ord, ord_buf);
1110
1111     wrbuf_putc(term_dict, '(');
1112
1113     for (i = 0; i<ord_len; i++)
1114     {
1115         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1116         wrbuf_putc(term_dict, ord_buf[i]);
1117     }
1118     wrbuf_putc(term_dict, ')');
1119
1120     prefix_len = wrbuf_len(term_dict);
1121
1122     if (zebra_maps_is_icu(zm))
1123     {
1124         int relation_value;
1125         AttrType relation;
1126
1127         attr_init_APT(&relation, zapt, 2);
1128         relation_value = attr_find(&relation, NULL);
1129         if (relation_value == 103) /* always matches */
1130             termp += strlen(termp); /* move to end of term */
1131         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1132         {
1133             /* ICU case */
1134             switch (truncation_value)
1135             {
1136             case -1:         /* not specified */
1137             case 100:        /* do not truncate */
1138                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1139                 {
1140                     *term_sub = 0;
1141                     return ZEBRA_OK;
1142                 }
1143                 break;
1144             case 102:
1145                 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1146                 {
1147                     *term_sub = 0;
1148                     return ZEBRA_OK;
1149                 }
1150                 break;
1151             case 1:          /* right truncation */
1152                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1153                 {
1154                     *term_sub = 0;
1155                     return ZEBRA_OK;
1156                 }
1157                 break;
1158             case 2:
1159                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1160                 {
1161                     *term_sub = 0;
1162                     return ZEBRA_OK;
1163                 }
1164                 break;
1165             case 3:
1166                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1167                 {
1168                     *term_sub = 0;
1169                     return ZEBRA_OK;
1170                 }
1171                 break;
1172             default:
1173                 zebra_setError_zint(zh,
1174                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1175                                     truncation_value);
1176                 return ZEBRA_FAIL;
1177             }
1178         }
1179         else
1180         {
1181             zebra_setError_zint(zh,
1182                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1183                                 relation_value);
1184             return ZEBRA_FAIL;
1185         }
1186     }
1187     else
1188     {
1189         /* non-ICU case. using string.chr and friends */
1190         switch (truncation_value)
1191         {
1192         case -1:         /* not specified */
1193         case 100:        /* do not truncate */
1194             if (!string_relation(zh, zapt, &termp, term_dict,
1195                                  attributeSet,
1196                                  zm, space_split, display_term,
1197                                  &relation_error))
1198             {
1199                 if (relation_error)
1200                 {
1201                     zebra_setError(zh, relation_error, 0);
1202                     return ZEBRA_FAIL;
1203                 }
1204                 *term_sub = 0;
1205                 return ZEBRA_OK;
1206             }
1207             break;
1208         case 1:          /* right truncation */
1209             wrbuf_putc(term_dict, '(');
1210             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1211             {
1212                 *term_sub = 0;
1213                 return ZEBRA_OK;
1214             }
1215             wrbuf_puts(term_dict, ".*)");
1216             break;
1217         case 2:          /* left truncation */
1218             wrbuf_puts(term_dict, "(.*");
1219             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1220             {
1221                 *term_sub = 0;
1222                 return ZEBRA_OK;
1223             }
1224             wrbuf_putc(term_dict, ')');
1225             break;
1226         case 3:          /* left&right truncation */
1227             wrbuf_puts(term_dict, "(.*");
1228             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1229             {
1230                 *term_sub = 0;
1231                 return ZEBRA_OK;
1232             }
1233             wrbuf_puts(term_dict, ".*)");
1234             break;
1235         case 101:        /* process # in term */
1236             wrbuf_putc(term_dict, '(');
1237             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1238             {
1239                 *term_sub = 0;
1240                 return ZEBRA_OK;
1241             }
1242             wrbuf_puts(term_dict, ")");
1243             break;
1244         case 102:        /* Regexp-1 */
1245             wrbuf_putc(term_dict, '(');
1246             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1247             {
1248                 *term_sub = 0;
1249                 return ZEBRA_OK;
1250             }
1251             wrbuf_putc(term_dict, ')');
1252             break;
1253         case 103:       /* Regexp-2 */
1254             regex_range = 1;
1255             wrbuf_putc(term_dict, '(');
1256             if (!term_103(zm, &termp, term_dict, &regex_range,
1257                           space_split, display_term))
1258             {
1259                 *term_sub = 0;
1260                 return ZEBRA_OK;
1261             }
1262             wrbuf_putc(term_dict, ')');
1263             break;
1264         case 104:        /* process ?n * # term */
1265             wrbuf_putc(term_dict, '(');
1266             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1267             {
1268                 *term_sub = 0;
1269                 return ZEBRA_OK;
1270             }
1271             wrbuf_putc(term_dict, ')');
1272             break;
1273         case 105:        /* process * ! in term and right truncate */
1274             wrbuf_putc(term_dict, '(');
1275             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1276             {
1277                 *term_sub = 0;
1278                 return ZEBRA_OK;
1279             }
1280             wrbuf_putc(term_dict, ')');
1281             break;
1282         case 106:        /* process * ! in term */
1283             wrbuf_putc(term_dict, '(');
1284             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1285             {
1286                 *term_sub = 0;
1287                 return ZEBRA_OK;
1288             }
1289             wrbuf_putc(term_dict, ')');
1290             break;
1291         default:
1292             zebra_setError_zint(zh,
1293                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1294                                 truncation_value);
1295             return ZEBRA_FAIL;
1296         }
1297     }
1298     if (1)
1299     {
1300         char buf[1000];
1301         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1302         esc_str(buf, sizeof(buf), input, strlen(input));
1303     }
1304     {
1305         WRBUF pr_wr = wrbuf_alloc();
1306
1307         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1308         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1309         wrbuf_destroy(pr_wr);
1310     }
1311     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1312                          grep_info, &max_pos,
1313                          ord_len /* number of "exact" chars */,
1314                          grep_handle);
1315     if (r == 1)
1316         zebra_set_partial_result(zh);
1317     else if (r)
1318         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1319     *term_sub = termp;
1320     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1321     return ZEBRA_OK;
1322 }
1323
1324
1325
1326 static void grep_info_delete(struct grep_info *grep_info)
1327 {
1328 #ifdef TERM_COUNT
1329     xfree(grep_info->term_no);
1330 #endif
1331     xfree(grep_info->isam_p_buf);
1332 }
1333
1334 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1335                                    Z_AttributesPlusTerm *zapt,
1336                                    struct grep_info *grep_info,
1337                                    const char *index_type)
1338 {
1339 #ifdef TERM_COUNT
1340     grep_info->term_no = 0;
1341 #endif
1342     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1343     grep_info->isam_p_size = 0;
1344     grep_info->isam_p_buf = NULL;
1345     grep_info->zh = zh;
1346     grep_info->index_type = index_type;
1347     grep_info->termset = 0;
1348     if (zapt)
1349     {
1350         AttrType truncmax;
1351         int truncmax_value;
1352
1353         attr_init_APT(&truncmax, zapt, 13);
1354         truncmax_value = attr_find(&truncmax, NULL);
1355         if (truncmax_value != -1)
1356             grep_info->trunc_max = truncmax_value;
1357     }
1358     if (zapt)
1359     {
1360         AttrType termset;
1361         int termset_value_numeric;
1362         const char *termset_value_string;
1363
1364         attr_init_APT(&termset, zapt, 8);
1365         termset_value_numeric =
1366             attr_find_ex(&termset, NULL, &termset_value_string);
1367         if (termset_value_numeric != -1)
1368         {
1369 #if TERMSET_DISABLE
1370             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1371             return ZEBRA_FAIL;
1372 #else
1373             char resname[32];
1374             const char *termset_name = 0;
1375             if (termset_value_numeric != -2)
1376             {
1377
1378                 sprintf(resname, "%d", termset_value_numeric);
1379                 termset_name = resname;
1380             }
1381             else
1382                 termset_name = termset_value_string;
1383             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1384             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1385             if (!grep_info->termset)
1386             {
1387                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1388                 return ZEBRA_FAIL;
1389             }
1390 #endif
1391         }
1392     }
1393     return ZEBRA_OK;
1394 }
1395
1396 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1397                                      Z_AttributesPlusTerm *zapt,
1398                                      const char *termz,
1399                                      const Odr_oid *attributeSet,
1400                                      zint hits_limit,
1401                                      NMEM stream,
1402                                      const char *index_type, int complete_flag,
1403                                      const char *rank_type,
1404                                      const char *xpath_use,
1405                                      NMEM rset_nmem,
1406                                      RSET **result_sets, int *num_result_sets,
1407                                      struct rset_key_control *kc,
1408                                      zebra_map_t zm)
1409 {
1410     struct grep_info grep_info;
1411     const char *termp = termz;
1412     int alloc_sets = 0;
1413
1414     *num_result_sets = 0;
1415     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1416         return ZEBRA_FAIL;
1417     while (1)
1418     {
1419         ZEBRA_RES res;
1420
1421         if (alloc_sets == *num_result_sets)
1422         {
1423             int add = 10;
1424             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1425                                               sizeof(*rnew));
1426             if (alloc_sets)
1427                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1428             alloc_sets = alloc_sets + add;
1429             *result_sets = rnew;
1430         }
1431         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1432                           stream, &grep_info,
1433                           index_type, complete_flag,
1434                           rank_type,
1435                           xpath_use, rset_nmem,
1436                           &(*result_sets)[*num_result_sets],
1437                           kc, zm);
1438         if (res != ZEBRA_OK)
1439         {
1440             int i;
1441             for (i = 0; i < *num_result_sets; i++)
1442                 rset_delete((*result_sets)[i]);
1443             grep_info_delete(&grep_info);
1444             return res;
1445         }
1446         if ((*result_sets)[*num_result_sets] == 0)
1447             break;
1448         (*num_result_sets)++;
1449
1450         if (!*termp)
1451             break;
1452     }
1453     grep_info_delete(&grep_info);
1454     return ZEBRA_OK;
1455 }
1456
1457 /**
1458    \brief Create result set(s) for list of terms
1459    \param zh Zebra Handle
1460    \param zapt Attributes Plust Term (RPN leaf)
1461    \param termz term as used in query but converted to UTF-8
1462    \param attributeSet default attribute set
1463    \param stream memory for result
1464    \param index_type register type ("w", "p",..)
1465    \param complete_flag whether it's phrases or not
1466    \param rank_type term flags for ranking
1467    \param xpath_use use attribute for X-Path (-1 for no X-path)
1468    \param rset_nmem memory for result sets
1469    \param result_sets output result set for each term in list (output)
1470    \param num_result_sets number of output result sets
1471    \param kc rset key control to be used for created result sets
1472 */
1473 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1474                                    Z_AttributesPlusTerm *zapt,
1475                                    const char *termz,
1476                                    const Odr_oid *attributeSet,
1477                                    zint hits_limit,
1478                                    NMEM stream,
1479                                    const char *index_type, int complete_flag,
1480                                    const char *rank_type,
1481                                    const char *xpath_use,
1482                                    NMEM rset_nmem,
1483                                    RSET **result_sets, int *num_result_sets,
1484                                    struct rset_key_control *kc)
1485 {
1486     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1487     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1488                                stream, index_type, complete_flag,
1489                                rank_type, xpath_use,
1490                                rset_nmem, result_sets, num_result_sets,
1491                                kc, zm);
1492 }
1493
1494
1495 /** \brief limit a search by position - returns result set
1496  */
1497 static ZEBRA_RES search_position(ZebraHandle zh,
1498                                  Z_AttributesPlusTerm *zapt,
1499                                  const Odr_oid *attributeSet,
1500                                  const char *index_type,
1501                                  NMEM rset_nmem,
1502                                  RSET *rset,
1503                                  struct rset_key_control *kc)
1504 {
1505     int position_value;
1506     AttrType position;
1507     int ord = -1;
1508     char ord_buf[32];
1509     char term_dict[100];
1510     int ord_len;
1511     char *val;
1512     ISAM_P isam_p;
1513     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1514
1515     attr_init_APT(&position, zapt, 3);
1516     position_value = attr_find(&position, NULL);
1517     switch(position_value)
1518     {
1519     case 3:
1520     case -1:
1521         return ZEBRA_OK;
1522     case 1:
1523     case 2:
1524         break;
1525     default:
1526         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1527                             position_value);
1528         return ZEBRA_FAIL;
1529     }
1530
1531
1532     if (!zebra_maps_is_first_in_field(zm))
1533     {
1534         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1535                             position_value);
1536         return ZEBRA_FAIL;
1537     }
1538
1539     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1540                           attributeSet, &ord) != ZEBRA_OK)
1541     {
1542         return ZEBRA_FAIL;
1543     }
1544     ord_len = key_SU_encode(ord, ord_buf);
1545     memcpy(term_dict, ord_buf, ord_len);
1546     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1547     val = dict_lookup(zh->reg->dict, term_dict);
1548     if (val)
1549     {
1550         assert(*val == sizeof(ISAM_P));
1551         memcpy(&isam_p, val+1, sizeof(isam_p));
1552
1553         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1554                                        isam_p, 0);
1555     }
1556     return ZEBRA_OK;
1557 }
1558
1559 /** \brief returns result set for phrase search
1560  */
1561 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1562                                        Z_AttributesPlusTerm *zapt,
1563                                        const char *termz_org,
1564                                        const Odr_oid *attributeSet,
1565                                        zint hits_limit,
1566                                        NMEM stream,
1567                                        const char *index_type,
1568                                        int complete_flag,
1569                                        const char *rank_type,
1570                                        const char *xpath_use,
1571                                        NMEM rset_nmem,
1572                                        RSET *rset,
1573                                        struct rset_key_control *kc)
1574 {
1575     RSET *result_sets = 0;
1576     int num_result_sets = 0;
1577     ZEBRA_RES res =
1578         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1579                           stream, index_type, complete_flag,
1580                           rank_type, xpath_use,
1581                           rset_nmem,
1582                           &result_sets, &num_result_sets, kc);
1583
1584     if (res != ZEBRA_OK)
1585         return res;
1586
1587     if (num_result_sets > 0)
1588     {
1589         RSET first_set = 0;
1590         res = search_position(zh, zapt, attributeSet,
1591                               index_type,
1592                               rset_nmem, &first_set,
1593                               kc);
1594         if (res != ZEBRA_OK)
1595         {
1596             int i;
1597             for (i = 0; i<num_result_sets; i++)
1598                 rset_delete(result_sets[i]);
1599             return res;
1600         }
1601         if (first_set)
1602         {
1603             RSET *nsets = nmem_malloc(stream,
1604                                       sizeof(RSET) * (num_result_sets+1));
1605             nsets[0] = first_set;
1606             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1607             result_sets = nsets;
1608             num_result_sets++;
1609         }
1610     }
1611     if (num_result_sets == 0)
1612         *rset = rset_create_null(rset_nmem, kc, 0);
1613     else if (num_result_sets == 1)
1614         *rset = result_sets[0];
1615     else
1616         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1617                                  num_result_sets, result_sets,
1618                                  1 /* ordered */, 0 /* exclusion */,
1619                                  3 /* relation */, 1 /* distance */);
1620     if (!*rset)
1621         return ZEBRA_FAIL;
1622     return ZEBRA_OK;
1623 }
1624
1625 /** \brief returns result set for or-list search
1626  */
1627 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1628                                         Z_AttributesPlusTerm *zapt,
1629                                         const char *termz_org,
1630                                         const Odr_oid *attributeSet,
1631                                         zint hits_limit,
1632                                         NMEM stream,
1633                                         const char *index_type,
1634                                         int complete_flag,
1635                                         const char *rank_type,
1636                                         const char *xpath_use,
1637                                         NMEM rset_nmem,
1638                                         RSET *rset,
1639                                         struct rset_key_control *kc)
1640 {
1641     RSET *result_sets = 0;
1642     int num_result_sets = 0;
1643     int i;
1644     ZEBRA_RES res =
1645         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1646                           stream, index_type, complete_flag,
1647                           rank_type, xpath_use,
1648                           rset_nmem,
1649                           &result_sets, &num_result_sets, kc);
1650     if (res != ZEBRA_OK)
1651         return res;
1652
1653     for (i = 0; i<num_result_sets; i++)
1654     {
1655         RSET first_set = 0;
1656         res = search_position(zh, zapt, attributeSet,
1657                               index_type,
1658                               rset_nmem, &first_set,
1659                               kc);
1660         if (res != ZEBRA_OK)
1661         {
1662             for (i = 0; i<num_result_sets; i++)
1663                 rset_delete(result_sets[i]);
1664             return res;
1665         }
1666
1667         if (first_set)
1668         {
1669             RSET tmp_set[2];
1670
1671             tmp_set[0] = first_set;
1672             tmp_set[1] = result_sets[i];
1673
1674             result_sets[i] = rset_create_prox(
1675                 rset_nmem, kc, kc->scope,
1676                 2, tmp_set,
1677                 1 /* ordered */, 0 /* exclusion */,
1678                 3 /* relation */, 1 /* distance */);
1679         }
1680     }
1681     if (num_result_sets == 0)
1682         *rset = rset_create_null(rset_nmem, kc, 0);
1683     else if (num_result_sets == 1)
1684         *rset = result_sets[0];
1685     else
1686         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1687                                num_result_sets, result_sets);
1688     if (!*rset)
1689         return ZEBRA_FAIL;
1690     return ZEBRA_OK;
1691 }
1692
1693 /** \brief returns result set for and-list search
1694  */
1695 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1696                                          Z_AttributesPlusTerm *zapt,
1697                                          const char *termz_org,
1698                                          const Odr_oid *attributeSet,
1699                                          zint hits_limit,
1700                                          NMEM stream,
1701                                          const char *index_type,
1702                                          int complete_flag,
1703                                          const char *rank_type,
1704                                          const char *xpath_use,
1705                                          NMEM rset_nmem,
1706                                          RSET *rset,
1707                                          struct rset_key_control *kc)
1708 {
1709     RSET *result_sets = 0;
1710     int num_result_sets = 0;
1711     int i;
1712     ZEBRA_RES res =
1713         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1714                           stream, index_type, complete_flag,
1715                           rank_type, xpath_use,
1716                           rset_nmem,
1717                           &result_sets, &num_result_sets,
1718                           kc);
1719     if (res != ZEBRA_OK)
1720         return res;
1721     for (i = 0; i<num_result_sets; i++)
1722     {
1723         RSET first_set = 0;
1724         res = search_position(zh, zapt, attributeSet,
1725                               index_type,
1726                               rset_nmem, &first_set,
1727                               kc);
1728         if (res != ZEBRA_OK)
1729         {
1730             for (i = 0; i<num_result_sets; i++)
1731                 rset_delete(result_sets[i]);
1732             return res;
1733         }
1734
1735         if (first_set)
1736         {
1737             RSET tmp_set[2];
1738
1739             tmp_set[0] = first_set;
1740             tmp_set[1] = result_sets[i];
1741
1742             result_sets[i] = rset_create_prox(
1743                 rset_nmem, kc, kc->scope,
1744                 2, tmp_set,
1745                 1 /* ordered */, 0 /* exclusion */,
1746                 3 /* relation */, 1 /* distance */);
1747         }
1748     }
1749
1750
1751     if (num_result_sets == 0)
1752         *rset = rset_create_null(rset_nmem, kc, 0);
1753     else if (num_result_sets == 1)
1754         *rset = result_sets[0];
1755     else
1756         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1757                                 num_result_sets, result_sets);
1758     if (!*rset)
1759         return ZEBRA_FAIL;
1760     return ZEBRA_OK;
1761 }
1762
1763 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1764                             const char **term_sub,
1765                             WRBUF term_dict,
1766                             const Odr_oid *attributeSet,
1767                             struct grep_info *grep_info,
1768                             int *max_pos,
1769                             zebra_map_t zm,
1770                             WRBUF display_term,
1771                             int *error_code)
1772 {
1773     AttrType relation;
1774     int relation_value;
1775     int term_value;
1776     int r;
1777     WRBUF term_num = wrbuf_alloc();
1778
1779     *error_code = 0;
1780     attr_init_APT(&relation, zapt, 2);
1781     relation_value = attr_find(&relation, NULL);
1782
1783     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1784
1785     switch (relation_value)
1786     {
1787     case 1:
1788         yaz_log(log_level_rpn, "Relation <");
1789         if (!term_100(zm, term_sub, term_num, 1, display_term))
1790         {
1791             wrbuf_destroy(term_num);
1792             return 0;
1793         }
1794         term_value = atoi(wrbuf_cstr(term_num));
1795         gen_regular_rel(term_dict, term_value-1, 1);
1796         break;
1797     case 2:
1798         yaz_log(log_level_rpn, "Relation <=");
1799         if (!term_100(zm, term_sub, term_num, 1, display_term))
1800         {
1801             wrbuf_destroy(term_num);
1802             return 0;
1803         }
1804         term_value = atoi(wrbuf_cstr(term_num));
1805         gen_regular_rel(term_dict, term_value, 1);
1806         break;
1807     case 4:
1808         yaz_log(log_level_rpn, "Relation >=");
1809         if (!term_100(zm, term_sub, term_num, 1, display_term))
1810         {
1811             wrbuf_destroy(term_num);
1812             return 0;
1813         }
1814         term_value = atoi(wrbuf_cstr(term_num));
1815         gen_regular_rel(term_dict, term_value, 0);
1816         break;
1817     case 5:
1818         yaz_log(log_level_rpn, "Relation >");
1819         if (!term_100(zm, term_sub, term_num, 1, display_term))
1820         {
1821             wrbuf_destroy(term_num);
1822             return 0;
1823         }
1824         term_value = atoi(wrbuf_cstr(term_num));
1825         gen_regular_rel(term_dict, term_value+1, 0);
1826         break;
1827     case -1:
1828     case 3:
1829         yaz_log(log_level_rpn, "Relation =");
1830         if (!term_100(zm, term_sub, term_num, 1, display_term))
1831         {
1832             wrbuf_destroy(term_num);
1833             return 0;
1834         }
1835         term_value = atoi(wrbuf_cstr(term_num));
1836         wrbuf_printf(term_dict, "(0*%d)", term_value);
1837         break;
1838     case 103:
1839         /* term_tmp untouched.. */
1840         while (**term_sub != '\0')
1841             (*term_sub)++;
1842         break;
1843     default:
1844         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1845         wrbuf_destroy(term_num);
1846         return 0;
1847     }
1848     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1849                          0, grep_info, max_pos, 0, grep_handle);
1850
1851     if (r == 1)
1852         zebra_set_partial_result(zh);
1853     else if (r)
1854         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1855     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1856     wrbuf_destroy(term_num);
1857     return 1;
1858 }
1859
1860 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1861                               const char **term_sub,
1862                               WRBUF term_dict,
1863                               const Odr_oid *attributeSet, NMEM stream,
1864                               struct grep_info *grep_info,
1865                               const char *index_type, int complete_flag,
1866                               WRBUF display_term,
1867                               const char *xpath_use,
1868                               struct ord_list **ol)
1869 {
1870     const char *termp;
1871     struct rpn_char_map_info rcmi;
1872     int max_pos;
1873     int relation_error = 0;
1874     int ord, ord_len, i;
1875     char ord_buf[32];
1876     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1877
1878     *ol = ord_list_create(stream);
1879
1880     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1881
1882     termp = *term_sub;
1883
1884     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1885                           attributeSet, &ord) != ZEBRA_OK)
1886     {
1887         return ZEBRA_FAIL;
1888     }
1889
1890     wrbuf_rewind(term_dict);
1891
1892     *ol = ord_list_append(stream, *ol, ord);
1893
1894     ord_len = key_SU_encode(ord, ord_buf);
1895
1896     wrbuf_putc(term_dict, '(');
1897     for (i = 0; i < ord_len; i++)
1898     {
1899         wrbuf_putc(term_dict, 1);
1900         wrbuf_putc(term_dict, ord_buf[i]);
1901     }
1902     wrbuf_putc(term_dict, ')');
1903
1904     if (!numeric_relation(zh, zapt, &termp, term_dict,
1905                           attributeSet, grep_info, &max_pos, zm,
1906                           display_term, &relation_error))
1907     {
1908         if (relation_error)
1909         {
1910             zebra_setError(zh, relation_error, 0);
1911             return ZEBRA_FAIL;
1912         }
1913         *term_sub = 0;
1914         return ZEBRA_OK;
1915     }
1916     *term_sub = termp;
1917     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1918     return ZEBRA_OK;
1919 }
1920
1921
1922 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1923                                         Z_AttributesPlusTerm *zapt,
1924                                         const char *termz,
1925                                         const Odr_oid *attributeSet,
1926                                         zint hits_limit,
1927                                         NMEM stream,
1928                                         const char *index_type,
1929                                         int complete_flag,
1930                                         const char *rank_type,
1931                                         const char *xpath_use,
1932                                         NMEM rset_nmem,
1933                                         RSET *rset,
1934                                         struct rset_key_control *kc)
1935 {
1936     const char *termp = termz;
1937     RSET *result_sets = 0;
1938     int num_result_sets = 0;
1939     ZEBRA_RES res;
1940     struct grep_info grep_info;
1941     int alloc_sets = 0;
1942     zint hits_limit_value = hits_limit;
1943     const char *term_ref_id_str = 0;
1944
1945     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1946                           stream);
1947
1948     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1949     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1950         return ZEBRA_FAIL;
1951     while (1)
1952     {
1953         struct ord_list *ol;
1954         WRBUF term_dict = wrbuf_alloc();
1955         WRBUF display_term = wrbuf_alloc();
1956         if (alloc_sets == num_result_sets)
1957         {
1958             int add = 10;
1959             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1960                                               sizeof(*rnew));
1961             if (alloc_sets)
1962                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1963             alloc_sets = alloc_sets + add;
1964             result_sets = rnew;
1965         }
1966         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1967         grep_info.isam_p_indx = 0;
1968         res = numeric_term(zh, zapt, &termp, term_dict,
1969                            attributeSet, stream, &grep_info,
1970                            index_type, complete_flag,
1971                            display_term, xpath_use, &ol);
1972         wrbuf_destroy(term_dict);
1973         if (res == ZEBRA_FAIL || termp == 0)
1974         {
1975             wrbuf_destroy(display_term);
1976             break;
1977         }
1978         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1979         result_sets[num_result_sets] =
1980             rset_trunc(zh, grep_info.isam_p_buf,
1981                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1982                        wrbuf_len(display_term), rank_type,
1983                        0 /* preserve position */,
1984                        zapt->term->which, rset_nmem,
1985                        kc, kc->scope, ol, index_type,
1986                        hits_limit_value,
1987                        term_ref_id_str);
1988         wrbuf_destroy(display_term);
1989         if (!result_sets[num_result_sets])
1990             break;
1991         num_result_sets++;
1992         if (!*termp)
1993             break;
1994     }
1995     grep_info_delete(&grep_info);
1996
1997     if (res != ZEBRA_OK)
1998         return res;
1999     if (num_result_sets == 0)
2000         *rset = rset_create_null(rset_nmem, kc, 0);
2001     else if (num_result_sets == 1)
2002         *rset = result_sets[0];
2003     else
2004         *rset = rset_create_and(rset_nmem, kc, kc->scope,
2005                                 num_result_sets, result_sets);
2006     if (!*rset)
2007         return ZEBRA_FAIL;
2008     return ZEBRA_OK;
2009 }
2010
2011 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2012                                       Z_AttributesPlusTerm *zapt,
2013                                       const char *termz,
2014                                       const Odr_oid *attributeSet,
2015                                       NMEM stream,
2016                                       const char *rank_type, NMEM rset_nmem,
2017                                       RSET *rset,
2018                                       struct rset_key_control *kc)
2019 {
2020     Record rec;
2021     zint sysno = atozint(termz);
2022
2023     if (sysno <= 0)
2024         sysno = 0;
2025     rec = rec_get(zh->reg->records, sysno);
2026     if (!rec)
2027         sysno = 0;
2028
2029     rec_free(&rec);
2030
2031     if (sysno <= 0)
2032     {
2033         *rset = rset_create_null(rset_nmem, kc, 0);
2034     }
2035     else
2036     {
2037         RSFD rsfd;
2038         struct it_key key;
2039         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2040                                  res_get(zh->res, "setTmpDir"), 0);
2041         rsfd = rset_open(*rset, RSETF_WRITE);
2042
2043         key.mem[0] = sysno;
2044         key.mem[1] = 1;
2045         key.len = 2;
2046         rset_write(rsfd, &key);
2047         rset_close(rsfd);
2048     }
2049     return ZEBRA_OK;
2050 }
2051
2052 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2053                                const Odr_oid *attributeSet, NMEM stream,
2054                                Z_SortKeySpecList *sort_sequence,
2055                                const char *rank_type,
2056                                NMEM rset_nmem,
2057                                RSET *rset,
2058                                struct rset_key_control *kc)
2059 {
2060     int i;
2061     int sort_relation_value;
2062     AttrType sort_relation_type;
2063     Z_SortKeySpec *sks;
2064     Z_SortKey *sk;
2065     char termz[20];
2066
2067     attr_init_APT(&sort_relation_type, zapt, 7);
2068     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2069
2070     if (!sort_sequence->specs)
2071     {
2072         sort_sequence->num_specs = 10;
2073         sort_sequence->specs = (Z_SortKeySpec **)
2074             nmem_malloc(stream, sort_sequence->num_specs *
2075                         sizeof(*sort_sequence->specs));
2076         for (i = 0; i<sort_sequence->num_specs; i++)
2077             sort_sequence->specs[i] = 0;
2078     }
2079     if (zapt->term->which != Z_Term_general)
2080         i = 0;
2081     else
2082         i = atoi_n((char *) zapt->term->u.general->buf,
2083                    zapt->term->u.general->len);
2084     if (i >= sort_sequence->num_specs)
2085         i = 0;
2086     sprintf(termz, "%d", i);
2087
2088     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2089     sks->sortElement = (Z_SortElement *)
2090         nmem_malloc(stream, sizeof(*sks->sortElement));
2091     sks->sortElement->which = Z_SortElement_generic;
2092     sk = sks->sortElement->u.generic = (Z_SortKey *)
2093         nmem_malloc(stream, sizeof(*sk));
2094     sk->which = Z_SortKey_sortAttributes;
2095     sk->u.sortAttributes = (Z_SortAttributes *)
2096         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2097
2098     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2099     sk->u.sortAttributes->list = zapt->attributes;
2100
2101     sks->sortRelation = (Odr_int *)
2102         nmem_malloc(stream, sizeof(*sks->sortRelation));
2103     if (sort_relation_value == 1)
2104         *sks->sortRelation = Z_SortKeySpec_ascending;
2105     else if (sort_relation_value == 2)
2106         *sks->sortRelation = Z_SortKeySpec_descending;
2107     else
2108         *sks->sortRelation = Z_SortKeySpec_ascending;
2109
2110     sks->caseSensitivity = (Odr_int *)
2111         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2112     *sks->caseSensitivity = 0;
2113
2114     sks->which = Z_SortKeySpec_null;
2115     sks->u.null = odr_nullval ();
2116     sort_sequence->specs[i] = sks;
2117     *rset = rset_create_null(rset_nmem, kc, 0);
2118     return ZEBRA_OK;
2119 }
2120
2121
2122 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2123                            const Odr_oid *attributeSet,
2124                            struct xpath_location_step *xpath, int max,
2125                            NMEM mem)
2126 {
2127     const Odr_oid *curAttributeSet = attributeSet;
2128     AttrType use;
2129     const char *use_string = 0;
2130
2131     attr_init_APT(&use, zapt, 1);
2132     attr_find_ex(&use, &curAttributeSet, &use_string);
2133
2134     if (!use_string || *use_string != '/')
2135         return -1;
2136
2137     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2138 }
2139
2140
2141
2142 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2143                         const char *index_type, const char *term,
2144                         const char *xpath_use,
2145                         NMEM rset_nmem,
2146                         struct rset_key_control *kc)
2147 {
2148     struct grep_info grep_info;
2149     int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2150                                            zinfo_index_category_index,
2151                                            index_type, xpath_use);
2152     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2153         return rset_create_null(rset_nmem, kc, 0);
2154
2155     if (ord < 0)
2156         return rset_create_null(rset_nmem, kc, 0);
2157     else
2158     {
2159         int i, max_pos;
2160         char ord_buf[32];
2161         RSET rset;
2162         WRBUF term_dict = wrbuf_alloc();
2163         int ord_len = key_SU_encode(ord, ord_buf);
2164         int term_type = Z_Term_characterString;
2165         const char *flags = "void";
2166
2167         wrbuf_putc(term_dict, '(');
2168         for (i = 0; i<ord_len; i++)
2169         {
2170             wrbuf_putc(term_dict, 1);
2171             wrbuf_putc(term_dict, ord_buf[i]);
2172         }
2173         wrbuf_putc(term_dict, ')');
2174         wrbuf_puts(term_dict, term);
2175
2176         grep_info.isam_p_indx = 0;
2177         dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2178                          &grep_info, &max_pos, 0, grep_handle);
2179         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2180                 grep_info.isam_p_indx);
2181         rset = rset_trunc(zh, grep_info.isam_p_buf,
2182                           grep_info.isam_p_indx, term, strlen(term),
2183                           flags, 1, term_type, rset_nmem,
2184                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2185                           0 /* term_ref_id_str */);
2186         grep_info_delete(&grep_info);
2187         wrbuf_destroy(term_dict);
2188         return rset;
2189     }
2190 }
2191
2192 static
2193 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2194                            NMEM stream, const char *rank_type, RSET rset,
2195                            int xpath_len, struct xpath_location_step *xpath,
2196                            NMEM rset_nmem,
2197                            RSET *rset_out,
2198                            struct rset_key_control *kc)
2199 {
2200     int i;
2201     int always_matches = rset ? 0 : 1;
2202
2203     if (xpath_len < 0)
2204     {
2205         *rset_out = rset;
2206         return ZEBRA_OK;
2207     }
2208
2209     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2210     for (i = 0; i<xpath_len; i++)
2211     {
2212         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2213
2214     }
2215
2216     /*
2217     //a    ->    a/.*
2218     //a/b  ->    b/a/.*
2219     /a     ->    a/
2220     /a/b   ->    b/a/
2221
2222     /      ->    none
2223
2224     a[@attr = value]/b[@other = othervalue]
2225
2226     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2227     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2228     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2229     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2230     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2231     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2232
2233     */
2234
2235     dict_grep_cmap(zh->reg->dict, 0, 0);
2236
2237     {
2238         int level = xpath_len;
2239         int first_path = 1;
2240
2241         while (--level >= 0)
2242         {
2243             WRBUF xpath_rev = wrbuf_alloc();
2244             int i;
2245             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2246
2247             for (i = level; i >= 1; --i)
2248             {
2249                 const char *cp = xpath[i].part;
2250                 if (*cp)
2251                 {
2252                     for (; *cp; cp++)
2253                     {
2254                         if (*cp == '*')
2255                             wrbuf_puts(xpath_rev, "[^/]*");
2256                         else if (*cp == ' ')
2257                             wrbuf_puts(xpath_rev, "\001 ");
2258                         else
2259                             wrbuf_putc(xpath_rev, *cp);
2260
2261                         /* wrbuf_putc does not null-terminate , but
2262                            wrbuf_puts below ensures it does.. so xpath_rev
2263                            is OK iff length is > 0 */
2264                     }
2265                     wrbuf_puts(xpath_rev, "/");
2266                 }
2267                 else if (i == 1)  /* // case */
2268                     wrbuf_puts(xpath_rev, ".*");
2269             }
2270             if (xpath[level].predicate &&
2271                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2272                 xpath[level].predicate->u.relation.name[0])
2273             {
2274                 WRBUF wbuf = wrbuf_alloc();
2275                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2276                 if (xpath[level].predicate->u.relation.value)
2277                 {
2278                     const char *cp = xpath[level].predicate->u.relation.value;
2279                     wrbuf_putc(wbuf, '=');
2280
2281                     while (*cp)
2282                     {
2283                         if (strchr(REGEX_CHARS, *cp))
2284                             wrbuf_putc(wbuf, '\\');
2285                         wrbuf_putc(wbuf, *cp);
2286                         cp++;
2287                     }
2288                 }
2289                 rset_attr = xpath_trunc(
2290                     zh, stream, "0", wrbuf_cstr(wbuf),
2291                     ZEBRA_XPATH_ATTR_NAME,
2292                     rset_nmem, kc);
2293                 wrbuf_destroy(wbuf);
2294             }
2295             else
2296             {
2297                 if (!first_path)
2298                 {
2299                     wrbuf_destroy(xpath_rev);
2300                     continue;
2301                 }
2302             }
2303             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2304                     wrbuf_cstr(xpath_rev));
2305             if (wrbuf_len(xpath_rev))
2306             {
2307                 rset_start_tag = xpath_trunc(zh, stream, "0",
2308                                              wrbuf_cstr(xpath_rev),
2309                                              ZEBRA_XPATH_ELM_BEGIN,
2310                                              rset_nmem, kc);
2311                 if (always_matches)
2312                     rset = rset_start_tag;
2313                 else
2314                 {
2315                     rset_end_tag = xpath_trunc(zh, stream, "0",
2316                                                wrbuf_cstr(xpath_rev),
2317                                                ZEBRA_XPATH_ELM_END,
2318                                                rset_nmem, kc);
2319
2320                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2321                                                rset_start_tag, rset,
2322                                                rset_end_tag, rset_attr);
2323                 }
2324             }
2325             wrbuf_destroy(xpath_rev);
2326             first_path = 0;
2327         }
2328     }
2329     *rset_out = rset;
2330     return ZEBRA_OK;
2331 }
2332
2333 #define MAX_XPATH_STEPS 10
2334
2335 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2336                                      Z_AttributesPlusTerm *zapt,
2337                                      const Odr_oid *attributeSet,
2338                                      zint hits_limit, NMEM stream,
2339                                      Z_SortKeySpecList *sort_sequence,
2340                                      NMEM rset_nmem,
2341                                      RSET *rset,
2342                                      struct rset_key_control *kc);
2343
2344 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2345                                 const Odr_oid *attributeSet,
2346                                 zint hits_limit, NMEM stream,
2347                                 Z_SortKeySpecList *sort_sequence,
2348                                 int num_bases, const char **basenames,
2349                                 NMEM rset_nmem,
2350                                 RSET *rset,
2351                                 struct rset_key_control *kc)
2352 {
2353     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2354     ZEBRA_RES res = ZEBRA_OK;
2355     int i;
2356     for (i = 0; i < num_bases; i++)
2357     {
2358
2359         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2360         {
2361             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2362                            basenames[i]);
2363             res = ZEBRA_FAIL;
2364             break;
2365         }
2366         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2367                                   sort_sequence,
2368                                   rset_nmem, rsets+i, kc);
2369         if (res != ZEBRA_OK)
2370             break;
2371     }
2372     if (res != ZEBRA_OK)
2373     {   /* must clean up the already created sets */
2374         while (--i >= 0)
2375             rset_delete(rsets[i]);
2376         *rset = 0;
2377     }
2378     else
2379     {
2380         if (num_bases == 1)
2381             *rset = rsets[0];
2382         else if (num_bases == 0)
2383             *rset = rset_create_null(rset_nmem, kc, 0);
2384         else
2385             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2386                                    num_bases, rsets);
2387     }
2388     return res;
2389 }
2390
2391 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2392                                      Z_AttributesPlusTerm *zapt,
2393                                      const Odr_oid *attributeSet,
2394                                      zint hits_limit, NMEM stream,
2395                                      Z_SortKeySpecList *sort_sequence,
2396                                      NMEM rset_nmem,
2397                                      RSET *rset,
2398                                      struct rset_key_control *kc)
2399 {
2400     ZEBRA_RES res = ZEBRA_OK;
2401     const char *index_type;
2402     char *search_type = NULL;
2403     char rank_type[128];
2404     int complete_flag;
2405     int sort_flag;
2406     char termz[IT_MAX_WORD+1];
2407     int xpath_len;
2408     const char *xpath_use = 0;
2409     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2410
2411     if (!log_level_set)
2412     {
2413         log_level_rpn = yaz_log_module_level("rpn");
2414         log_level_set = 1;
2415     }
2416     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2417                     rank_type, &complete_flag, &sort_flag);
2418
2419     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2420     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2421     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2422     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2423
2424     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2425         return ZEBRA_FAIL;
2426
2427     if (sort_flag)
2428         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2429                              rank_type, rset_nmem, rset, kc);
2430     /* consider if an X-Path query is used */
2431     xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2432                                 xpath, MAX_XPATH_STEPS, stream);
2433     if (xpath_len >= 0)
2434     {
2435         if (xpath[xpath_len-1].part[0] == '@')
2436             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2437         else
2438             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */
2439
2440         if (1)
2441         {
2442             AttrType relation;
2443             int relation_value;
2444
2445             attr_init_APT(&relation, zapt, 2);
2446             relation_value = attr_find(&relation, NULL);
2447
2448             if (relation_value == 103) /* alwaysmatches */
2449             {
2450                 *rset = 0; /* signal no "term" set */
2451                 return rpn_search_xpath(zh, stream, rank_type, *rset,
2452                                         xpath_len, xpath, rset_nmem, rset, kc);
2453             }
2454         }
2455     }
2456
2457     /* search using one of the various search type strategies
2458        termz is our UTF-8 search term
2459        attributeSet is top-level default attribute set
2460        stream is ODR for search
2461        reg_id is the register type
2462        complete_flag is 1 for complete subfield, 0 for incomplete
2463        xpath_use is use-attribute to be used for X-Path search, 0 for none
2464     */
2465     if (!strcmp(search_type, "phrase"))
2466     {
2467         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2468                                     stream,
2469                                     index_type, complete_flag, rank_type,
2470                                     xpath_use,
2471                                     rset_nmem,
2472                                     rset, kc);
2473     }
2474     else if (!strcmp(search_type, "and-list"))
2475     {
2476         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2477                                       stream,
2478                                       index_type, complete_flag, rank_type,
2479                                       xpath_use,
2480                                       rset_nmem,
2481                                       rset, kc);
2482     }
2483     else if (!strcmp(search_type, "or-list"))
2484     {
2485         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2486                                      stream,
2487                                      index_type, complete_flag, rank_type,
2488                                      xpath_use,
2489                                      rset_nmem,
2490                                      rset, kc);
2491     }
2492     else if (!strcmp(search_type, "local"))
2493     {
2494         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2495                                    rank_type, rset_nmem, rset, kc);
2496     }
2497     else if (!strcmp(search_type, "numeric"))
2498     {
2499         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2500                                      stream,
2501                                      index_type, complete_flag, rank_type,
2502                                      xpath_use,
2503                                      rset_nmem,
2504                                      rset, kc);
2505     }
2506     else
2507     {
2508         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2509         res = ZEBRA_FAIL;
2510     }
2511     if (res != ZEBRA_OK)
2512         return res;
2513     if (!*rset)
2514         return ZEBRA_FAIL;
2515     return rpn_search_xpath(zh, stream, rank_type, *rset,
2516                             xpath_len, xpath, rset_nmem, rset, kc);
2517 }
2518
2519 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2520                                       const Odr_oid *attributeSet,
2521                                       zint hits_limit,
2522                                       NMEM stream, NMEM rset_nmem,
2523                                       Z_SortKeySpecList *sort_sequence,
2524                                       int num_bases, const char **basenames,
2525                                       RSET **result_sets, int *num_result_sets,
2526                                       Z_Operator *parent_op,
2527                                       struct rset_key_control *kc);
2528
2529 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2530                                    zint *approx_limit)
2531 {
2532     ZEBRA_RES res = ZEBRA_OK;
2533     if (zs->which == Z_RPNStructure_complex)
2534     {
2535         if (res == ZEBRA_OK)
2536             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2537                                            approx_limit);
2538         if (res == ZEBRA_OK)
2539             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2540                                            approx_limit);
2541     }
2542     else if (zs->which == Z_RPNStructure_simple)
2543     {
2544         if (zs->u.simple->which == Z_Operand_APT)
2545         {
2546             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2547             AttrType global_hits_limit_attr;
2548             int l;
2549
2550             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2551
2552             l = attr_find(&global_hits_limit_attr, NULL);
2553             if (l != -1)
2554                 *approx_limit = l;
2555         }
2556     }
2557     return res;
2558 }
2559
2560 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2561                          const Odr_oid *attributeSet,
2562                          zint hits_limit,
2563                          NMEM stream, NMEM rset_nmem,
2564                          Z_SortKeySpecList *sort_sequence,
2565                          int num_bases, const char **basenames,
2566                          RSET *result_set)
2567 {
2568     RSET *result_sets = 0;
2569     int num_result_sets = 0;
2570     ZEBRA_RES res;
2571     struct rset_key_control *kc = zebra_key_control_create(zh);
2572
2573     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2574                                stream, rset_nmem,
2575                                sort_sequence,
2576                                num_bases, basenames,
2577                                &result_sets, &num_result_sets,
2578                                0 /* no parent op */,
2579                                kc);
2580     if (res != ZEBRA_OK)
2581     {
2582         int i;
2583         for (i = 0; i<num_result_sets; i++)
2584             rset_delete(result_sets[i]);
2585         *result_set = 0;
2586     }
2587     else
2588     {
2589         assert(num_result_sets == 1);
2590         assert(result_sets);
2591         assert(*result_sets);
2592         *result_set = *result_sets;
2593     }
2594     (*kc->dec)(kc);
2595     return res;
2596 }
2597
2598 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2599                                const Odr_oid *attributeSet, zint hits_limit,
2600                                NMEM stream, NMEM rset_nmem,
2601                                Z_SortKeySpecList *sort_sequence,
2602                                int num_bases, const char **basenames,
2603                                RSET **result_sets, int *num_result_sets,
2604                                Z_Operator *parent_op,
2605                                struct rset_key_control *kc)
2606 {
2607     *num_result_sets = 0;
2608     if (zs->which == Z_RPNStructure_complex)
2609     {
2610         ZEBRA_RES res;
2611         Z_Operator *zop = zs->u.complex->roperator;
2612         RSET *result_sets_l = 0;
2613         int num_result_sets_l = 0;
2614         RSET *result_sets_r = 0;
2615         int num_result_sets_r = 0;
2616
2617         res = rpn_search_structure(zh, zs->u.complex->s1,
2618                                    attributeSet, hits_limit, stream, rset_nmem,
2619                                    sort_sequence,
2620                                    num_bases, basenames,
2621                                    &result_sets_l, &num_result_sets_l,
2622                                    zop, kc);
2623         if (res != ZEBRA_OK)
2624         {
2625             int i;
2626             for (i = 0; i<num_result_sets_l; i++)
2627                 rset_delete(result_sets_l[i]);
2628             return res;
2629         }
2630         res = rpn_search_structure(zh, zs->u.complex->s2,
2631                                    attributeSet, hits_limit, stream, rset_nmem,
2632                                    sort_sequence,
2633                                    num_bases, basenames,
2634                                    &result_sets_r, &num_result_sets_r,
2635                                    zop, kc);
2636         if (res != ZEBRA_OK)
2637         {
2638             int i;
2639             for (i = 0; i<num_result_sets_l; i++)
2640                 rset_delete(result_sets_l[i]);
2641             for (i = 0; i<num_result_sets_r; i++)
2642                 rset_delete(result_sets_r[i]);
2643             return res;
2644         }
2645
2646         /* make a new list of result for all children */
2647         *num_result_sets = num_result_sets_l + num_result_sets_r;
2648         *result_sets = nmem_malloc(stream, *num_result_sets *
2649                                    sizeof(**result_sets));
2650         memcpy(*result_sets, result_sets_l,
2651                num_result_sets_l * sizeof(**result_sets));
2652         memcpy(*result_sets + num_result_sets_l, result_sets_r,
2653                num_result_sets_r * sizeof(**result_sets));
2654
2655         if (!parent_op || parent_op->which != zop->which
2656             || (zop->which != Z_Operator_and &&
2657                 zop->which != Z_Operator_or))
2658         {
2659             /* parent node different from this one (or non-present) */
2660             /* we must combine result sets now */
2661             RSET rset;
2662             switch (zop->which)
2663             {
2664             case Z_Operator_and:
2665                 rset = rset_create_and(rset_nmem, kc,
2666                                        kc->scope,
2667                                        *num_result_sets, *result_sets);
2668                 break;
2669             case Z_Operator_or:
2670                 rset = rset_create_or(rset_nmem, kc,
2671                                       kc->scope, 0, /* termid */
2672                                       *num_result_sets, *result_sets);
2673                 break;
2674             case Z_Operator_and_not:
2675                 rset = rset_create_not(rset_nmem, kc,
2676                                        kc->scope,
2677                                        (*result_sets)[0],
2678                                        (*result_sets)[1]);
2679                 break;
2680             case Z_Operator_prox:
2681                 if (zop->u.prox->which != Z_ProximityOperator_known)
2682                 {
2683                     zebra_setError(zh,
2684                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2685                                    0);
2686                     return ZEBRA_FAIL;
2687                 }
2688                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2689                 {
2690                     zebra_setError_zint(zh,
2691                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2692                                         *zop->u.prox->u.known);
2693                     return ZEBRA_FAIL;
2694                 }
2695                 else
2696                 {
2697                     rset = rset_create_prox(rset_nmem, kc,
2698                                             kc->scope,
2699                                             *num_result_sets, *result_sets,
2700                                             *zop->u.prox->ordered,
2701                                             (!zop->u.prox->exclusion ?
2702                                              0 : *zop->u.prox->exclusion),
2703                                             *zop->u.prox->relationType,
2704                                             *zop->u.prox->distance );
2705                 }
2706                 break;
2707             default:
2708                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2709                 return ZEBRA_FAIL;
2710             }
2711             *num_result_sets = 1;
2712             *result_sets = nmem_malloc(stream, *num_result_sets *
2713                                        sizeof(**result_sets));
2714             (*result_sets)[0] = rset;
2715         }
2716     }
2717     else if (zs->which == Z_RPNStructure_simple)
2718     {
2719         RSET rset;
2720         ZEBRA_RES res;
2721
2722         if (zs->u.simple->which == Z_Operand_APT)
2723         {
2724             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2725             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2726                                  attributeSet, hits_limit,
2727                                  stream, sort_sequence,
2728                                  num_bases, basenames, rset_nmem, &rset,
2729                                  kc);
2730             if (res != ZEBRA_OK)
2731                 return res;
2732         }
2733         else if (zs->u.simple->which == Z_Operand_resultSetId)
2734         {
2735             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2736             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2737             if (!rset)
2738             {
2739                 zebra_setError(zh,
2740                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2741                                zs->u.simple->u.resultSetId);
2742                 return ZEBRA_FAIL;
2743             }
2744             rset_dup(rset);
2745         }
2746         else
2747         {
2748             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2749             return ZEBRA_FAIL;
2750         }
2751         *num_result_sets = 1;
2752         *result_sets = nmem_malloc(stream, *num_result_sets *
2753                                    sizeof(**result_sets));
2754         (*result_sets)[0] = rset;
2755     }
2756     else
2757     {
2758         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2759         return ZEBRA_FAIL;
2760     }
2761     return ZEBRA_OK;
2762 }
2763
2764
2765
2766 /*
2767  * Local variables:
2768  * c-basic-offset: 4
2769  * c-file-style: "Stroustrup"
2770  * indent-tabs-mode: nil
2771  * End:
2772  * vim: shiftwidth=4 tabstop=8 expandtab
2773  */
2774