ICU: support @attr 5=2, @attr 5=3 in searches
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2011 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231         
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 static int term_100_icu(zebra_map_t zm,
238                         const char **src, WRBUF term_dict, int space_split,
239                         WRBUF display_term,
240                         int mode)
241 {
242     int i;
243     const char *res_buf = 0;
244     size_t res_len = 0;
245     const char *display_buf;
246     size_t display_len;
247     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
248                                  &display_buf, &display_len))
249     {
250         *src += strlen(*src);
251         return 0;
252     }
253     wrbuf_write(display_term, display_buf, display_len);
254     if (mode)
255     {
256         /* ICU sort keys seem to be of the form
257            basechars \x01 accents \x01 length
258            For now we'll just right truncate from basechars . This 
259            may give false hits due to accents not being used.
260         */
261         i = res_len;
262         while (--i >= 0 && res_buf[i] != '\x01')
263             ;
264         if (i > 0)
265         {
266             while (--i >= 0 && res_buf[i] != '\x01')
267                 ;
268         }
269         if (i == 0)
270         {  /* did not find base chars at all. Throw error */
271             return -1;
272         }
273         res_len = i; /* reduce res_len */
274     }
275     if (mode & 2)
276         wrbuf_puts(term_dict, ".*");
277     for (i = 0; i < res_len; i++)
278     {
279         if (strchr(REGEX_CHARS "\\", res_buf[i]))
280             wrbuf_putc(term_dict, '\\');
281         if (res_buf[i] < 32)
282             wrbuf_putc(term_dict, 1);
283             
284         wrbuf_putc(term_dict, res_buf[i]);
285     }
286     if (mode & 1)
287         wrbuf_puts(term_dict, ".*");
288     else if (mode)
289         wrbuf_puts(term_dict, "\x01\x01.*");
290         
291     return 1;
292 }
293
294 /* term_100: handle term, where trunc = none(no operators at all) */
295 static int term_100(zebra_map_t zm,
296                     const char **src, WRBUF term_dict, int space_split,
297                     WRBUF display_term)
298 {
299     const char *s0;
300     const char **map;
301     int i = 0;
302
303     const char *space_start = 0;
304     const char *space_end = 0;
305
306     if (!term_pre(zm, src, 0, !space_split))
307         return 0;
308     s0 = *src;
309     while (*s0)
310     {
311         const char *s1 = s0;
312         int q_map_match = 0;
313         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
314         if (space_split)
315         {
316             if (**map == *CHR_SPACE)
317                 break;
318         }
319         else  /* complete subfield only. */
320         {
321             if (**map == *CHR_SPACE)
322             {   /* save space mapping for later  .. */
323                 space_start = s1;
324                 space_end = s0;
325                 continue;
326             }
327             else if (space_start)
328             {   /* reload last space */
329                 while (space_start < space_end)
330                 {
331                     if (strchr(REGEX_CHARS, *space_start))
332                         wrbuf_putc(term_dict, '\\');
333                     wrbuf_putc(display_term, *space_start);
334                     wrbuf_putc(term_dict, *space_start);
335                     space_start++;
336                                
337                 }
338                 /* and reset */
339                 space_start = space_end = 0;
340             }
341         }
342         i++;
343
344         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
345     }
346     *src = s0;
347     return i;
348 }
349
350 /* term_101: handle term, where trunc = Process # */
351 static int term_101(zebra_map_t zm,
352                     const char **src, WRBUF term_dict, int space_split,
353                     WRBUF display_term)
354 {
355     const char *s0;
356     const char **map;
357     int i = 0;
358
359     if (!term_pre(zm, src, "#", !space_split))
360         return 0;
361     s0 = *src;
362     while (*s0)
363     {
364         if (*s0 == '#')
365         {
366             i++;
367             wrbuf_puts(term_dict, ".*");
368             wrbuf_putc(display_term, *s0);
369             s0++;
370         }
371         else
372         {
373             const char *s1 = s0;
374             int q_map_match = 0;
375             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
376             if (space_split && **map == *CHR_SPACE)
377                 break;
378
379             i++;
380             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
381         }
382     }
383     *src = s0;
384     return i;
385 }
386
387 /* term_103: handle term, where trunc = re-2 (regular expressions) */
388 static int term_103(zebra_map_t zm, const char **src,
389                     WRBUF term_dict, int *errors, int space_split,
390                     WRBUF display_term)
391 {
392     int i = 0;
393     const char *s0;
394     const char **map;
395
396     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
397         return 0;
398     s0 = *src;
399     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
400         isdigit(((const unsigned char *)s0)[1]))
401     {
402         *errors = s0[1] - '0';
403         s0 += 3;
404         if (*errors > 3)
405             *errors = 3;
406     }
407     while (*s0)
408     {
409         if (strchr("^\\()[].*+?|-", *s0))
410         {
411             wrbuf_putc(display_term, *s0);
412             wrbuf_putc(term_dict, *s0);
413             s0++;
414             i++;
415         }
416         else
417         {
418             const char *s1 = s0;
419             int q_map_match = 0;
420             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
421             if (space_split && **map == *CHR_SPACE)
422                 break;
423
424             i++;
425             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
426         }
427     }
428     *src = s0;
429     
430     return i;
431 }
432
433 /* term_103: handle term, where trunc = re-1 (regular expressions) */
434 static int term_102(zebra_map_t zm, const char **src,
435                     WRBUF term_dict, int space_split, WRBUF display_term)
436 {
437     return term_103(zm, src, term_dict, NULL, space_split, display_term);
438 }
439
440
441 /* term_104: handle term, process ?n * # */
442 static int term_104(zebra_map_t zm, const char **src, 
443                     WRBUF term_dict, int space_split, WRBUF display_term)
444 {
445     const char *s0;
446     const char **map;
447     int i = 0;
448
449     if (!term_pre(zm, src, "?*#", !space_split))
450         return 0;
451     s0 = *src;
452     while (*s0)
453     {
454         if (*s0 == '?')
455         {
456             i++;
457             wrbuf_putc(display_term, *s0);
458             s0++;
459             if (*s0 >= '0' && *s0 <= '9')
460             {
461                 int limit = 0;
462                 while (*s0 >= '0' && *s0 <= '9')
463                 {
464                     limit = limit * 10 + (*s0 - '0');
465                     wrbuf_putc(display_term, *s0);
466                     s0++;
467                 }
468                 if (limit > 20)
469                     limit = 20;
470                 while (--limit >= 0)
471                 {
472                     wrbuf_puts(term_dict, ".?");
473                 }
474             }
475             else
476             {
477                 wrbuf_puts(term_dict, ".*");
478             }
479         }
480         else if (*s0 == '*')
481         {
482             i++;
483             wrbuf_puts(term_dict, ".*");
484             wrbuf_putc(display_term, *s0);
485             s0++;
486         }
487         else if (*s0 == '#')
488         {
489             i++;
490             wrbuf_puts(term_dict, ".");
491             wrbuf_putc(display_term, *s0);
492             s0++;
493         }
494         else
495         {
496             const char *s1 = s0;
497             int q_map_match = 0;
498             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
499             if (space_split && **map == *CHR_SPACE)
500                 break;
501
502             i++;
503             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504         }
505     }
506     *src = s0;
507     return i;
508 }
509
510 /* term_105/106: handle term, process * ! and possibly right_truncate */
511 static int term_105(zebra_map_t zm, const char **src, 
512                     WRBUF term_dict, int space_split,
513                     WRBUF display_term, int right_truncate)
514 {
515     const char *s0;
516     const char **map;
517     int i = 0;
518
519     if (!term_pre(zm, src, "\\*!", !space_split))
520         return 0;
521     s0 = *src;
522     while (*s0)
523     {
524         if (*s0 == '*')
525         {
526             i++;
527             wrbuf_puts(term_dict, ".*");
528             wrbuf_putc(display_term, *s0);
529             s0++;
530         }
531         else if (*s0 == '!')
532         {
533             i++;
534             wrbuf_putc(term_dict, '.');
535             wrbuf_putc(display_term, *s0);
536             s0++;
537         }
538         else if (*s0 == '\\')
539         {
540             i++;
541             wrbuf_puts(term_dict, "\\\\");
542             wrbuf_putc(display_term, *s0);
543             s0++;
544         }
545         else
546         {
547             const char *s1 = s0;
548             int q_map_match = 0;
549             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
550             if (space_split && **map == *CHR_SPACE)
551                 break;
552
553             i++;
554             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
555         }
556     }
557     if (right_truncate)
558         wrbuf_puts(term_dict, ".*");
559     *src = s0;
560     return i;
561 }
562
563
564 /* gen_regular_rel - generate regular expression from relation
565  *  val:     border value (inclusive)
566  *  islt:    1 if <=; 0 if >=.
567  */
568 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
569 {
570     char dst_buf[20*5*20]; /* assuming enough for expansion */
571     char *dst = dst_buf;
572     int dst_p;
573     int w, d, i;
574     int pos = 0;
575     char numstr[20];
576
577     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
578     if (val >= 0)
579     {
580         if (islt)
581             strcpy(dst, "(-[0-9]+|(");
582         else
583             strcpy(dst, "((");
584     } 
585     else
586     {
587         if (!islt)
588         {
589             strcpy(dst, "([0-9]+|-(");
590             islt = 1;
591         }
592         else
593         {
594             strcpy(dst, "(-(");
595             islt = 0;
596         }
597         val = -val;
598     }
599     dst_p = strlen(dst);
600     sprintf(numstr, "%d", val);
601     for (w = strlen(numstr); --w >= 0; pos++)
602     {
603         d = numstr[w];
604         if (pos > 0)
605         {
606             if (islt)
607             {
608                 if (d == '0')
609                     continue;
610                 d--;
611             } 
612             else
613             {
614                 if (d == '9')
615                     continue;
616                 d++;
617             }
618         }
619         
620         strcpy(dst + dst_p, numstr);
621         dst_p = strlen(dst) - pos - 1;
622
623         if (islt)
624         {
625             if (d != '0')
626             {
627                 dst[dst_p++] = '[';
628                 dst[dst_p++] = '0';
629                 dst[dst_p++] = '-';
630                 dst[dst_p++] = d;
631                 dst[dst_p++] = ']';
632             }
633             else
634                 dst[dst_p++] = d;
635         }
636         else
637         {
638             if (d != '9')
639             { 
640                 dst[dst_p++] = '[';
641                 dst[dst_p++] = d;
642                 dst[dst_p++] = '-';
643                 dst[dst_p++] = '9';
644                 dst[dst_p++] = ']';
645             }
646             else
647                 dst[dst_p++] = d;
648         }
649         for (i = 0; i<pos; i++)
650         {
651             dst[dst_p++] = '[';
652             dst[dst_p++] = '0';
653             dst[dst_p++] = '-';
654             dst[dst_p++] = '9';
655             dst[dst_p++] = ']';
656         }
657         dst[dst_p++] = '|';
658     }
659     dst[dst_p] = '\0';
660     if (islt)
661     {
662         /* match everything less than 10^(pos-1) */
663         strcat(dst, "0*");
664         for (i = 1; i<pos; i++)
665             strcat(dst, "[0-9]?");
666     }
667     else
668     {
669         /* match everything greater than 10^pos */
670         for (i = 0; i <= pos; i++)
671             strcat(dst, "[0-9]");
672         strcat(dst, "[0-9]*");
673     }
674     strcat(dst, "))");
675     wrbuf_puts(term_dict, dst);
676 }
677
678 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
679 {
680     const char *src = wrbuf_cstr(wsrc);
681     if (src[*indx] == '\\')
682     {
683         wrbuf_putc(term_p, src[*indx]);
684         (*indx)++;
685     }
686     wrbuf_putc(term_p, src[*indx]);
687     (*indx)++;
688 }
689
690 /*
691  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
692  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
693  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
694  *              ([^-a].*|a[^-b].*|ab[c-].*)
695  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
696  *              ([^a-].*|a[^b-].*|ab[^c-].*)
697  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
698  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
699  */
700 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
701                            const char **term_sub, WRBUF term_dict,
702                            const Odr_oid *attributeSet,
703                            zebra_map_t zm, int space_split, 
704                            WRBUF display_term,
705                            int *error_code)
706 {
707     AttrType relation;
708     int relation_value;
709     int i;
710     WRBUF term_component = wrbuf_alloc();
711
712     attr_init_APT(&relation, zapt, 2);
713     relation_value = attr_find(&relation, NULL);
714
715     *error_code = 0;
716     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
717     switch (relation_value)
718     {
719     case 1:
720         if (!term_100(zm, term_sub, term_component, space_split, display_term))
721         {
722             wrbuf_destroy(term_component);
723             return 0;
724         }
725         yaz_log(log_level_rpn, "Relation <");
726         
727         wrbuf_putc(term_dict, '(');
728         for (i = 0; i < wrbuf_len(term_component); )
729         {
730             int j = 0;
731             
732             if (i)
733                 wrbuf_putc(term_dict, '|');
734             while (j < i)
735                 string_rel_add_char(term_dict, term_component, &j);
736
737             wrbuf_putc(term_dict, '[');
738
739             wrbuf_putc(term_dict, '^');
740             
741             wrbuf_putc(term_dict, 1);
742             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
743             
744             string_rel_add_char(term_dict, term_component, &i);
745             wrbuf_putc(term_dict, '-');
746             
747             wrbuf_putc(term_dict, ']');
748             wrbuf_putc(term_dict, '.');
749             wrbuf_putc(term_dict, '*');
750         }
751         wrbuf_putc(term_dict, ')');
752         break;
753     case 2:
754         if (!term_100(zm, term_sub, term_component, space_split, display_term))
755         {
756             wrbuf_destroy(term_component);
757             return 0;
758         }
759         yaz_log(log_level_rpn, "Relation <=");
760
761         wrbuf_putc(term_dict, '(');
762         for (i = 0; i < wrbuf_len(term_component); )
763         {
764             int j = 0;
765
766             while (j < i)
767                 string_rel_add_char(term_dict, term_component, &j);
768             wrbuf_putc(term_dict, '[');
769
770             wrbuf_putc(term_dict, '^');
771
772             wrbuf_putc(term_dict, 1);
773             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
774
775             string_rel_add_char(term_dict, term_component, &i);
776             wrbuf_putc(term_dict, '-');
777
778             wrbuf_putc(term_dict, ']');
779             wrbuf_putc(term_dict, '.');
780             wrbuf_putc(term_dict, '*');
781
782             wrbuf_putc(term_dict, '|');
783         }
784         for (i = 0; i < wrbuf_len(term_component); )
785             string_rel_add_char(term_dict, term_component, &i);
786         wrbuf_putc(term_dict, ')');
787         break;
788     case 5:
789         if (!term_100(zm, term_sub, term_component, space_split, display_term))
790         {
791             wrbuf_destroy(term_component);
792             return 0;
793         }
794         yaz_log(log_level_rpn, "Relation >");
795
796         wrbuf_putc(term_dict, '(');
797         for (i = 0; i < wrbuf_len(term_component); )
798         {
799             int j = 0;
800
801             while (j < i)
802                 string_rel_add_char(term_dict, term_component, &j);
803             wrbuf_putc(term_dict, '[');
804             
805             wrbuf_putc(term_dict, '^');
806             wrbuf_putc(term_dict, '-');
807             string_rel_add_char(term_dict, term_component, &i);
808
809             wrbuf_putc(term_dict, ']');
810             wrbuf_putc(term_dict, '.');
811             wrbuf_putc(term_dict, '*');
812
813             wrbuf_putc(term_dict, '|');
814         }
815         for (i = 0; i < wrbuf_len(term_component); )
816             string_rel_add_char(term_dict, term_component, &i);
817         wrbuf_putc(term_dict, '.');
818         wrbuf_putc(term_dict, '+');
819         wrbuf_putc(term_dict, ')');
820         break;
821     case 4:
822         if (!term_100(zm, term_sub, term_component, space_split, display_term))
823         {
824             wrbuf_destroy(term_component);
825             return 0;
826         }
827         yaz_log(log_level_rpn, "Relation >=");
828
829         wrbuf_putc(term_dict, '(');
830         for (i = 0; i < wrbuf_len(term_component); )
831         {
832             int j = 0;
833
834             if (i)
835                 wrbuf_putc(term_dict, '|');
836             while (j < i)
837                 string_rel_add_char(term_dict, term_component, &j);
838             wrbuf_putc(term_dict, '[');
839
840             if (i < wrbuf_len(term_component)-1)
841             {
842                 wrbuf_putc(term_dict, '^');
843                 wrbuf_putc(term_dict, '-');
844                 string_rel_add_char(term_dict, term_component, &i);
845             }
846             else
847             {
848                 string_rel_add_char(term_dict, term_component, &i);
849                 wrbuf_putc(term_dict, '-');
850             }
851             wrbuf_putc(term_dict, ']');
852             wrbuf_putc(term_dict, '.');
853             wrbuf_putc(term_dict, '*');
854         }
855         wrbuf_putc(term_dict, ')');
856         break;
857     case 3:
858     case 102:
859     case -1:
860         if (!**term_sub)
861             return 1;
862         yaz_log(log_level_rpn, "Relation =");
863         if (!term_100(zm, term_sub, term_component, space_split, display_term))
864         {
865             wrbuf_destroy(term_component);
866             return 0;
867         }
868         wrbuf_puts(term_dict, "(");
869         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
870         wrbuf_puts(term_dict, ")");
871         break;
872     case 103:
873         yaz_log(log_level_rpn, "Relation always matches");
874         /* skip to end of term (we don't care what it is) */
875         while (**term_sub != '\0')
876             (*term_sub)++;
877         break;
878     default:
879         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
880         wrbuf_destroy(term_component);
881         return 0;
882     }
883     wrbuf_destroy(term_component);
884     return 1;
885 }
886
887 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
888                              const char **term_sub, 
889                              WRBUF term_dict,
890                              const Odr_oid *attributeSet, NMEM stream,
891                              struct grep_info *grep_info,
892                              const char *index_type, int complete_flag,
893                              WRBUF display_term,
894                              const char *xpath_use,
895                              struct ord_list **ol,
896                              zebra_map_t zm);
897
898 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
899                                 Z_AttributesPlusTerm *zapt,
900                                 zint *hits_limit_value,
901                                 const char **term_ref_id_str,
902                                 NMEM nmem)
903 {
904     AttrType term_ref_id_attr;
905     AttrType hits_limit_attr;
906     int term_ref_id_int;
907     zint hits_limit_from_attr;
908  
909     attr_init_APT(&hits_limit_attr, zapt, 11);
910     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
911
912     attr_init_APT(&term_ref_id_attr, zapt, 10);
913     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
914     if (term_ref_id_int >= 0)
915     {
916         char *res = nmem_malloc(nmem, 20);
917         sprintf(res, "%d", term_ref_id_int);
918         *term_ref_id_str = res;
919     }
920     if (hits_limit_from_attr != -1)
921         *hits_limit_value = hits_limit_from_attr;
922
923     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
924             *term_ref_id_str ? *term_ref_id_str : "none",
925             *hits_limit_value);
926     return ZEBRA_OK;
927 }
928
929 /** \brief search for term (which may be truncated)
930  */
931 static ZEBRA_RES search_term(ZebraHandle zh,
932                              Z_AttributesPlusTerm *zapt,
933                              const char **term_sub, 
934                              const Odr_oid *attributeSet,
935                              zint hits_limit, NMEM stream,
936                              struct grep_info *grep_info,
937                              const char *index_type, int complete_flag,
938                              const char *rank_type, 
939                              const char *xpath_use,
940                              NMEM rset_nmem,
941                              RSET *rset,
942                              struct rset_key_control *kc,
943                              zebra_map_t zm)
944 {
945     ZEBRA_RES res;
946     struct ord_list *ol;
947     zint hits_limit_value = hits_limit;
948     const char *term_ref_id_str = 0;
949     WRBUF term_dict = wrbuf_alloc();
950     WRBUF display_term = wrbuf_alloc();
951     *rset = 0;
952     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
953                           stream);
954     grep_info->isam_p_indx = 0;
955     res = string_term(zh, zapt, term_sub, term_dict,
956                       attributeSet, stream, grep_info,
957                       index_type, complete_flag,
958                       display_term, xpath_use, &ol, zm);
959     wrbuf_destroy(term_dict);
960     if (res == ZEBRA_OK && *term_sub)
961     {
962         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
963         *rset = rset_trunc(zh, grep_info->isam_p_buf,
964                            grep_info->isam_p_indx, wrbuf_buf(display_term),
965                            wrbuf_len(display_term), rank_type, 
966                            1 /* preserve pos */,
967                            zapt->term->which, rset_nmem,
968                            kc, kc->scope, ol, index_type, hits_limit_value,
969                            term_ref_id_str);
970         if (!*rset)
971             res = ZEBRA_FAIL;
972     }
973     wrbuf_destroy(display_term);
974     return res;
975 }
976
977 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
978                              const char **term_sub, 
979                              WRBUF term_dict,
980                              const Odr_oid *attributeSet, NMEM stream,
981                              struct grep_info *grep_info,
982                              const char *index_type, int complete_flag,
983                              WRBUF display_term,
984                              const char *xpath_use,
985                              struct ord_list **ol,
986                              zebra_map_t zm)
987 {
988     int r;
989     AttrType truncation;
990     int truncation_value;
991     const char *termp;
992     struct rpn_char_map_info rcmi;
993
994     int space_split = complete_flag ? 0 : 1;
995     int ord = -1;
996     int regex_range = 0;
997     int max_pos, prefix_len = 0;
998     int relation_error;
999     char ord_buf[32];
1000     int ord_len, i;
1001
1002     *ol = ord_list_create(stream);
1003
1004     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1005     attr_init_APT(&truncation, zapt, 5);
1006     truncation_value = attr_find(&truncation, NULL);
1007     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1008
1009     termp = *term_sub; /* start of term for each database */
1010     
1011     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1012                           attributeSet, &ord) != ZEBRA_OK)
1013     {
1014         *term_sub = 0;
1015         return ZEBRA_FAIL;
1016     }
1017     
1018     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1019     
1020     *ol = ord_list_append(stream, *ol, ord);
1021     ord_len = key_SU_encode(ord, ord_buf);
1022     
1023     wrbuf_putc(term_dict, '(');
1024     
1025     for (i = 0; i<ord_len; i++)
1026     {
1027         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1028         wrbuf_putc(term_dict, ord_buf[i]);
1029     }
1030     wrbuf_putc(term_dict, ')');
1031     
1032     prefix_len = wrbuf_len(term_dict);
1033
1034     if (zebra_maps_is_icu(zm))
1035     {
1036         int relation_value;
1037         AttrType relation;
1038         
1039         attr_init_APT(&relation, zapt, 2);
1040         relation_value = attr_find(&relation, NULL);
1041         if (relation_value == 103) /* always matches */
1042             termp += strlen(termp); /* move to end of term */
1043         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1044         {
1045             /* ICU case */
1046             switch (truncation_value)
1047             {
1048             case -1:         /* not specified */
1049             case 100:        /* do not truncate */
1050                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1051                 {
1052                     *term_sub = 0;
1053                     return ZEBRA_OK;
1054                 }
1055                 break;
1056             case 1:          /* right truncation */
1057                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1058                 {
1059                     *term_sub = 0;
1060                     return ZEBRA_OK;
1061                 }
1062                 break;
1063             case 2:
1064                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1065                 {
1066                     *term_sub = 0;
1067                     return ZEBRA_OK;
1068                 }
1069                 break;
1070             case 3:
1071                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1072                 {
1073                     *term_sub = 0;
1074                     return ZEBRA_OK;
1075                 }
1076                 break;
1077             default:
1078                 zebra_setError_zint(zh,
1079                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1080                                     truncation_value);
1081                 return ZEBRA_FAIL;
1082             }
1083         }
1084         else
1085         {
1086             zebra_setError_zint(zh,
1087                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1088                                 relation_value);
1089             return ZEBRA_FAIL;
1090         }
1091     }
1092     else
1093     {
1094         /* non-ICU case. using string.chr and friends */
1095         switch (truncation_value)
1096         {
1097         case -1:         /* not specified */
1098         case 100:        /* do not truncate */
1099             if (!string_relation(zh, zapt, &termp, term_dict,
1100                                  attributeSet,
1101                                  zm, space_split, display_term,
1102                                  &relation_error))
1103             {
1104                 if (relation_error)
1105                 {
1106                     zebra_setError(zh, relation_error, 0);
1107                     return ZEBRA_FAIL;
1108                 }
1109                 *term_sub = 0;
1110                 return ZEBRA_OK;
1111             }
1112             break;
1113         case 1:          /* right truncation */
1114             wrbuf_putc(term_dict, '(');
1115             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1116             {
1117                 *term_sub = 0;
1118                 return ZEBRA_OK;
1119             }
1120             wrbuf_puts(term_dict, ".*)");
1121             break;
1122         case 2:          /* left truncation */
1123             wrbuf_puts(term_dict, "(.*");
1124             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1125             {
1126                 *term_sub = 0;
1127                 return ZEBRA_OK;
1128             }
1129             wrbuf_putc(term_dict, ')');
1130             break;
1131         case 3:          /* left&right truncation */
1132             wrbuf_puts(term_dict, "(.*");
1133             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1134             {
1135                 *term_sub = 0;
1136                 return ZEBRA_OK;
1137             }
1138             wrbuf_puts(term_dict, ".*)");
1139             break;
1140         case 101:        /* process # in term */
1141             wrbuf_putc(term_dict, '(');
1142             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1143             {
1144                 *term_sub = 0;
1145                 return ZEBRA_OK;
1146             }
1147             wrbuf_puts(term_dict, ")");
1148             break;
1149         case 102:        /* Regexp-1 */
1150             wrbuf_putc(term_dict, '(');
1151             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1152             {
1153                 *term_sub = 0;
1154                 return ZEBRA_OK;
1155             }
1156             wrbuf_putc(term_dict, ')');
1157             break;
1158         case 103:       /* Regexp-2 */
1159             regex_range = 1;
1160             wrbuf_putc(term_dict, '(');
1161             if (!term_103(zm, &termp, term_dict, &regex_range,
1162                           space_split, display_term))
1163             {
1164                 *term_sub = 0;
1165                 return ZEBRA_OK;
1166             }
1167             wrbuf_putc(term_dict, ')');
1168             break;
1169         case 104:        /* process ?n * # term */
1170             wrbuf_putc(term_dict, '(');
1171             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1172             {
1173                 *term_sub = 0;
1174                 return ZEBRA_OK;
1175             }
1176             wrbuf_putc(term_dict, ')');
1177             break;
1178         case 105:        /* process * ! in term and right truncate */
1179             wrbuf_putc(term_dict, '(');
1180             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1181             {
1182                 *term_sub = 0;
1183                 return ZEBRA_OK;
1184             }
1185             wrbuf_putc(term_dict, ')');
1186             break;
1187         case 106:        /* process * ! in term */
1188             wrbuf_putc(term_dict, '(');
1189             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1190             {
1191                 *term_sub = 0;
1192                 return ZEBRA_OK;
1193             }
1194             wrbuf_putc(term_dict, ')');
1195             break;
1196         default:
1197             zebra_setError_zint(zh,
1198                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1199                                 truncation_value);
1200             return ZEBRA_FAIL;
1201         }
1202     }
1203     if (1)
1204     {
1205         char buf[1000];
1206         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1207         esc_str(buf, sizeof(buf), input, strlen(input));
1208     }
1209     {
1210         WRBUF pr_wr = wrbuf_alloc();
1211
1212         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1213         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1214         wrbuf_destroy(pr_wr);
1215     }
1216     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1217                          grep_info, &max_pos, 
1218                          ord_len /* number of "exact" chars */,
1219                          grep_handle);
1220     if (r == 1)
1221         zebra_set_partial_result(zh);
1222     else if (r)
1223         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1224     *term_sub = termp;
1225     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1226     return ZEBRA_OK;
1227 }
1228
1229
1230
1231 static void grep_info_delete(struct grep_info *grep_info)
1232 {
1233 #ifdef TERM_COUNT
1234     xfree(grep_info->term_no);
1235 #endif
1236     xfree(grep_info->isam_p_buf);
1237 }
1238
1239 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1240                                    Z_AttributesPlusTerm *zapt,
1241                                    struct grep_info *grep_info,
1242                                    const char *index_type)
1243 {
1244 #ifdef TERM_COUNT
1245     grep_info->term_no = 0;
1246 #endif
1247     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1248     grep_info->isam_p_size = 0;
1249     grep_info->isam_p_buf = NULL;
1250     grep_info->zh = zh;
1251     grep_info->index_type = index_type;
1252     grep_info->termset = 0;
1253     if (zapt)
1254     {
1255         AttrType truncmax;
1256         int truncmax_value;
1257
1258         attr_init_APT(&truncmax, zapt, 13);
1259         truncmax_value = attr_find(&truncmax, NULL);
1260         if (truncmax_value != -1)
1261             grep_info->trunc_max = truncmax_value;
1262     }
1263     if (zapt)
1264     {
1265         AttrType termset;
1266         int termset_value_numeric;
1267         const char *termset_value_string;
1268
1269         attr_init_APT(&termset, zapt, 8);
1270         termset_value_numeric =
1271             attr_find_ex(&termset, NULL, &termset_value_string);
1272         if (termset_value_numeric != -1)
1273         {
1274 #if TERMSET_DISABLE
1275             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1276             return ZEBRA_FAIL;
1277 #else
1278             char resname[32];
1279             const char *termset_name = 0;
1280             if (termset_value_numeric != -2)
1281             {
1282                 
1283                 sprintf(resname, "%d", termset_value_numeric);
1284                 termset_name = resname;
1285             }
1286             else
1287                 termset_name = termset_value_string;
1288             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1289             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1290             if (!grep_info->termset)
1291             {
1292                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1293                 return ZEBRA_FAIL;
1294             }
1295 #endif
1296         }
1297     }
1298     return ZEBRA_OK;
1299 }
1300
1301 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1302                                      Z_AttributesPlusTerm *zapt,
1303                                      const char *termz,
1304                                      const Odr_oid *attributeSet,
1305                                      zint hits_limit,
1306                                      NMEM stream,
1307                                      const char *index_type, int complete_flag,
1308                                      const char *rank_type,
1309                                      const char *xpath_use,
1310                                      NMEM rset_nmem,
1311                                      RSET **result_sets, int *num_result_sets,
1312                                      struct rset_key_control *kc,
1313                                      zebra_map_t zm)
1314 {
1315     struct grep_info grep_info;
1316     const char *termp = termz;
1317     int alloc_sets = 0;
1318     
1319     *num_result_sets = 0;
1320     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1321         return ZEBRA_FAIL;
1322     while (1)
1323     { 
1324         ZEBRA_RES res;
1325
1326         if (alloc_sets == *num_result_sets)
1327         {
1328             int add = 10;
1329             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1330                                               sizeof(*rnew));
1331             if (alloc_sets)
1332                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1333             alloc_sets = alloc_sets + add;
1334             *result_sets = rnew;
1335         }
1336         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1337                           stream, &grep_info,
1338                           index_type, complete_flag,
1339                           rank_type,
1340                           xpath_use, rset_nmem,
1341                           &(*result_sets)[*num_result_sets],
1342                           kc, zm);
1343         if (res != ZEBRA_OK)
1344         {
1345             int i;
1346             for (i = 0; i < *num_result_sets; i++)
1347                 rset_delete((*result_sets)[i]);
1348             grep_info_delete(&grep_info);
1349             return res;
1350         }
1351         if ((*result_sets)[*num_result_sets] == 0)
1352             break;
1353         (*num_result_sets)++;
1354
1355         if (!*termp)
1356             break;
1357     }
1358     grep_info_delete(&grep_info);
1359     return ZEBRA_OK;
1360 }
1361                                
1362 /**
1363    \brief Create result set(s) for list of terms
1364    \param zh Zebra Handle
1365    \param zapt Attributes Plust Term (RPN leaf)
1366    \param termz term as used in query but converted to UTF-8
1367    \param attributeSet default attribute set
1368    \param stream memory for result
1369    \param index_type register type ("w", "p",..)
1370    \param complete_flag whether it's phrases or not
1371    \param rank_type term flags for ranking
1372    \param xpath_use use attribute for X-Path (-1 for no X-path)
1373    \param rset_nmem memory for result sets
1374    \param result_sets output result set for each term in list (output)
1375    \param num_result_sets number of output result sets
1376    \param kc rset key control to be used for created result sets
1377 */
1378 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1379                                    Z_AttributesPlusTerm *zapt,
1380                                    const char *termz,
1381                                    const Odr_oid *attributeSet,
1382                                    zint hits_limit,
1383                                    NMEM stream,
1384                                    const char *index_type, int complete_flag,
1385                                    const char *rank_type,
1386                                    const char *xpath_use,
1387                                    NMEM rset_nmem,
1388                                    RSET **result_sets, int *num_result_sets,
1389                                    struct rset_key_control *kc)
1390 {
1391     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1392     if (zebra_maps_is_icu(zm))
1393         zebra_map_tokenize_start(zm, termz, strlen(termz));
1394     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1395                                stream, index_type, complete_flag,
1396                                rank_type, xpath_use,
1397                                rset_nmem, result_sets, num_result_sets,
1398                                kc, zm);
1399 }
1400
1401
1402 /** \brief limit a search by position - returns result set
1403  */
1404 static ZEBRA_RES search_position(ZebraHandle zh,
1405                                  Z_AttributesPlusTerm *zapt,
1406                                  const Odr_oid *attributeSet,
1407                                  const char *index_type,
1408                                  NMEM rset_nmem,
1409                                  RSET *rset,
1410                                  struct rset_key_control *kc)
1411 {
1412     int position_value;
1413     AttrType position;
1414     int ord = -1;
1415     char ord_buf[32];
1416     char term_dict[100];
1417     int ord_len;
1418     char *val;
1419     ISAM_P isam_p;
1420     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1421     
1422     attr_init_APT(&position, zapt, 3);
1423     position_value = attr_find(&position, NULL);
1424     switch(position_value)
1425     {
1426     case 3:
1427     case -1:
1428         return ZEBRA_OK;
1429     case 1:
1430     case 2:
1431         break;
1432     default:
1433         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1434                             position_value);
1435         return ZEBRA_FAIL;
1436     }
1437
1438
1439     if (!zebra_maps_is_first_in_field(zm))
1440     {
1441         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1442                             position_value);
1443         return ZEBRA_FAIL;
1444     }
1445
1446     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1447                           attributeSet, &ord) != ZEBRA_OK)
1448     {
1449         return ZEBRA_FAIL;
1450     }
1451     ord_len = key_SU_encode(ord, ord_buf);
1452     memcpy(term_dict, ord_buf, ord_len);
1453     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1454     val = dict_lookup(zh->reg->dict, term_dict);
1455     if (val)
1456     {
1457         assert(*val == sizeof(ISAM_P));
1458         memcpy(&isam_p, val+1, sizeof(isam_p));
1459
1460         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1461                                        isam_p, 0);
1462     }
1463     return ZEBRA_OK;
1464 }
1465
1466 /** \brief returns result set for phrase search
1467  */
1468 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1469                                        Z_AttributesPlusTerm *zapt,
1470                                        const char *termz_org,
1471                                        const Odr_oid *attributeSet,
1472                                        zint hits_limit,
1473                                        NMEM stream,
1474                                        const char *index_type,
1475                                        int complete_flag,
1476                                        const char *rank_type,
1477                                        const char *xpath_use,
1478                                        NMEM rset_nmem,
1479                                        RSET *rset,
1480                                        struct rset_key_control *kc)
1481 {
1482     RSET *result_sets = 0;
1483     int num_result_sets = 0;
1484     ZEBRA_RES res =
1485         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1486                           stream, index_type, complete_flag,
1487                           rank_type, xpath_use,
1488                           rset_nmem,
1489                           &result_sets, &num_result_sets, kc);
1490     
1491     if (res != ZEBRA_OK)
1492         return res;
1493
1494     if (num_result_sets > 0)
1495     {
1496         RSET first_set = 0;
1497         res = search_position(zh, zapt, attributeSet, 
1498                               index_type,
1499                               rset_nmem, &first_set,
1500                               kc);
1501         if (res != ZEBRA_OK)
1502         {
1503             int i;
1504             for (i = 0; i<num_result_sets; i++)
1505                 rset_delete(result_sets[i]);
1506             return res;
1507         }
1508         if (first_set)
1509         {
1510             RSET *nsets = nmem_malloc(stream,
1511                                       sizeof(RSET) * (num_result_sets+1));
1512             nsets[0] = first_set;
1513             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1514             result_sets = nsets;
1515             num_result_sets++;
1516         }
1517     }
1518     if (num_result_sets == 0)
1519         *rset = rset_create_null(rset_nmem, kc, 0); 
1520     else if (num_result_sets == 1)
1521         *rset = result_sets[0];
1522     else
1523         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1524                                  num_result_sets, result_sets,
1525                                  1 /* ordered */, 0 /* exclusion */,
1526                                  3 /* relation */, 1 /* distance */);
1527     if (!*rset)
1528         return ZEBRA_FAIL;
1529     return ZEBRA_OK;
1530 }
1531
1532 /** \brief returns result set for or-list search
1533  */
1534 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1535                                         Z_AttributesPlusTerm *zapt,
1536                                         const char *termz_org,
1537                                         const Odr_oid *attributeSet,
1538                                         zint hits_limit,
1539                                         NMEM stream,
1540                                         const char *index_type, 
1541                                         int complete_flag,
1542                                         const char *rank_type,
1543                                         const char *xpath_use,
1544                                         NMEM rset_nmem,
1545                                         RSET *rset,
1546                                         struct rset_key_control *kc)
1547 {
1548     RSET *result_sets = 0;
1549     int num_result_sets = 0;
1550     int i;
1551     ZEBRA_RES res =
1552         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1553                           stream, index_type, complete_flag,
1554                           rank_type, xpath_use,
1555                           rset_nmem,
1556                           &result_sets, &num_result_sets, kc);
1557     if (res != ZEBRA_OK)
1558         return res;
1559
1560     for (i = 0; i<num_result_sets; i++)
1561     {
1562         RSET first_set = 0;
1563         res = search_position(zh, zapt, attributeSet, 
1564                               index_type,
1565                               rset_nmem, &first_set,
1566                               kc);
1567         if (res != ZEBRA_OK)
1568         {
1569             for (i = 0; i<num_result_sets; i++)
1570                 rset_delete(result_sets[i]);
1571             return res;
1572         }
1573
1574         if (first_set)
1575         {
1576             RSET tmp_set[2];
1577
1578             tmp_set[0] = first_set;
1579             tmp_set[1] = result_sets[i];
1580             
1581             result_sets[i] = rset_create_prox(
1582                 rset_nmem, kc, kc->scope,
1583                 2, tmp_set,
1584                 1 /* ordered */, 0 /* exclusion */,
1585                 3 /* relation */, 1 /* distance */);
1586         }
1587     }
1588     if (num_result_sets == 0)
1589         *rset = rset_create_null(rset_nmem, kc, 0); 
1590     else if (num_result_sets == 1)
1591         *rset = result_sets[0];
1592     else
1593         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1594                                num_result_sets, result_sets);
1595     if (!*rset)
1596         return ZEBRA_FAIL;
1597     return ZEBRA_OK;
1598 }
1599
1600 /** \brief returns result set for and-list search
1601  */
1602 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1603                                          Z_AttributesPlusTerm *zapt,
1604                                          const char *termz_org,
1605                                          const Odr_oid *attributeSet,
1606                                          zint hits_limit,
1607                                          NMEM stream,
1608                                          const char *index_type, 
1609                                          int complete_flag,
1610                                          const char *rank_type, 
1611                                          const char *xpath_use,
1612                                          NMEM rset_nmem,
1613                                          RSET *rset,
1614                                          struct rset_key_control *kc)
1615 {
1616     RSET *result_sets = 0;
1617     int num_result_sets = 0;
1618     int i;
1619     ZEBRA_RES res =
1620         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1621                           stream, index_type, complete_flag,
1622                           rank_type, xpath_use,
1623                           rset_nmem,
1624                           &result_sets, &num_result_sets,
1625                           kc);
1626     if (res != ZEBRA_OK)
1627         return res;
1628     for (i = 0; i<num_result_sets; i++)
1629     {
1630         RSET first_set = 0;
1631         res = search_position(zh, zapt, attributeSet, 
1632                               index_type,
1633                               rset_nmem, &first_set,
1634                               kc);
1635         if (res != ZEBRA_OK)
1636         {
1637             for (i = 0; i<num_result_sets; i++)
1638                 rset_delete(result_sets[i]);
1639             return res;
1640         }
1641
1642         if (first_set)
1643         {
1644             RSET tmp_set[2];
1645
1646             tmp_set[0] = first_set;
1647             tmp_set[1] = result_sets[i];
1648             
1649             result_sets[i] = rset_create_prox(
1650                 rset_nmem, kc, kc->scope,
1651                 2, tmp_set,
1652                 1 /* ordered */, 0 /* exclusion */,
1653                 3 /* relation */, 1 /* distance */);
1654         }
1655     }
1656
1657
1658     if (num_result_sets == 0)
1659         *rset = rset_create_null(rset_nmem, kc, 0); 
1660     else if (num_result_sets == 1)
1661         *rset = result_sets[0];
1662     else
1663         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1664                                 num_result_sets, result_sets);
1665     if (!*rset)
1666         return ZEBRA_FAIL;
1667     return ZEBRA_OK;
1668 }
1669
1670 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1671                             const char **term_sub,
1672                             WRBUF term_dict,
1673                             const Odr_oid *attributeSet,
1674                             struct grep_info *grep_info,
1675                             int *max_pos,
1676                             zebra_map_t zm,
1677                             WRBUF display_term,
1678                             int *error_code)
1679 {
1680     AttrType relation;
1681     int relation_value;
1682     int term_value;
1683     int r;
1684     WRBUF term_num = wrbuf_alloc();
1685
1686     *error_code = 0;
1687     attr_init_APT(&relation, zapt, 2);
1688     relation_value = attr_find(&relation, NULL);
1689
1690     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1691
1692     switch (relation_value)
1693     {
1694     case 1:
1695         yaz_log(log_level_rpn, "Relation <");
1696         if (!term_100(zm, term_sub, term_num, 1, display_term))
1697         { 
1698             wrbuf_destroy(term_num);
1699             return 0;
1700         }
1701         term_value = atoi(wrbuf_cstr(term_num));
1702         gen_regular_rel(term_dict, term_value-1, 1);
1703         break;
1704     case 2:
1705         yaz_log(log_level_rpn, "Relation <=");
1706         if (!term_100(zm, term_sub, term_num, 1, display_term))
1707         {
1708             wrbuf_destroy(term_num);
1709             return 0;
1710         }
1711         term_value = atoi(wrbuf_cstr(term_num));
1712         gen_regular_rel(term_dict, term_value, 1);
1713         break;
1714     case 4:
1715         yaz_log(log_level_rpn, "Relation >=");
1716         if (!term_100(zm, term_sub, term_num, 1, display_term))
1717         {
1718             wrbuf_destroy(term_num);
1719             return 0;
1720         }
1721         term_value = atoi(wrbuf_cstr(term_num));
1722         gen_regular_rel(term_dict, term_value, 0);
1723         break;
1724     case 5:
1725         yaz_log(log_level_rpn, "Relation >");
1726         if (!term_100(zm, term_sub, term_num, 1, display_term))
1727         {
1728             wrbuf_destroy(term_num);
1729             return 0;
1730         }
1731         term_value = atoi(wrbuf_cstr(term_num));
1732         gen_regular_rel(term_dict, term_value+1, 0);
1733         break;
1734     case -1:
1735     case 3:
1736         yaz_log(log_level_rpn, "Relation =");
1737         if (!term_100(zm, term_sub, term_num, 1, display_term))
1738         {
1739             wrbuf_destroy(term_num);
1740             return 0; 
1741         }
1742         term_value = atoi(wrbuf_cstr(term_num));
1743         wrbuf_printf(term_dict, "(0*%d)", term_value);
1744         break;
1745     case 103:
1746         /* term_tmp untouched.. */
1747         while (**term_sub != '\0')
1748             (*term_sub)++;
1749         break;
1750     default:
1751         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1752         wrbuf_destroy(term_num); 
1753         return 0;
1754     }
1755     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1756                          0, grep_info, max_pos, 0, grep_handle);
1757
1758     if (r == 1)
1759         zebra_set_partial_result(zh);
1760     else if (r)
1761         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1762     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1763     wrbuf_destroy(term_num);
1764     return 1;
1765 }
1766
1767 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1768                               const char **term_sub, 
1769                               WRBUF term_dict,
1770                               const Odr_oid *attributeSet, NMEM stream,
1771                               struct grep_info *grep_info,
1772                               const char *index_type, int complete_flag,
1773                               WRBUF display_term,
1774                               const char *xpath_use,
1775                               struct ord_list **ol)
1776 {
1777     const char *termp;
1778     struct rpn_char_map_info rcmi;
1779     int max_pos;
1780     int relation_error = 0;
1781     int ord, ord_len, i;
1782     char ord_buf[32];
1783     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1784     
1785     *ol = ord_list_create(stream);
1786
1787     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1788
1789     termp = *term_sub;
1790     
1791     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1792                           attributeSet, &ord) != ZEBRA_OK)
1793     {
1794         return ZEBRA_FAIL;
1795     }
1796     
1797     wrbuf_rewind(term_dict);
1798     
1799     *ol = ord_list_append(stream, *ol, ord);
1800     
1801     ord_len = key_SU_encode(ord, ord_buf);
1802     
1803     wrbuf_putc(term_dict, '(');
1804     for (i = 0; i < ord_len; i++)
1805     {
1806         wrbuf_putc(term_dict, 1);
1807         wrbuf_putc(term_dict, ord_buf[i]);
1808     }
1809     wrbuf_putc(term_dict, ')');
1810     
1811     if (!numeric_relation(zh, zapt, &termp, term_dict,
1812                           attributeSet, grep_info, &max_pos, zm,
1813                           display_term, &relation_error))
1814     {
1815         if (relation_error)
1816         {
1817             zebra_setError(zh, relation_error, 0);
1818             return ZEBRA_FAIL;
1819         }
1820         *term_sub = 0;
1821         return ZEBRA_OK;
1822     }
1823     *term_sub = termp;
1824     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1825     return ZEBRA_OK;
1826 }
1827
1828                                  
1829 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1830                                         Z_AttributesPlusTerm *zapt,
1831                                         const char *termz,
1832                                         const Odr_oid *attributeSet,
1833                                         zint hits_limit,
1834                                         NMEM stream,
1835                                         const char *index_type, 
1836                                         int complete_flag,
1837                                         const char *rank_type, 
1838                                         const char *xpath_use,
1839                                         NMEM rset_nmem,
1840                                         RSET *rset,
1841                                         struct rset_key_control *kc)
1842 {
1843     const char *termp = termz;
1844     RSET *result_sets = 0;
1845     int num_result_sets = 0;
1846     ZEBRA_RES res;
1847     struct grep_info grep_info;
1848     int alloc_sets = 0;
1849     zint hits_limit_value = hits_limit;
1850     const char *term_ref_id_str = 0;
1851
1852     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1853                           stream);
1854
1855     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1856     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1857         return ZEBRA_FAIL;
1858     while (1)
1859     { 
1860         struct ord_list *ol;
1861         WRBUF term_dict = wrbuf_alloc();
1862         WRBUF display_term = wrbuf_alloc();
1863         if (alloc_sets == num_result_sets)
1864         {
1865             int add = 10;
1866             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1867                                               sizeof(*rnew));
1868             if (alloc_sets)
1869                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1870             alloc_sets = alloc_sets + add;
1871             result_sets = rnew;
1872         }
1873         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1874         grep_info.isam_p_indx = 0;
1875         res = numeric_term(zh, zapt, &termp, term_dict,
1876                            attributeSet, stream, &grep_info,
1877                            index_type, complete_flag,
1878                            display_term, xpath_use, &ol);
1879         wrbuf_destroy(term_dict);
1880         if (res == ZEBRA_FAIL || termp == 0)
1881         {
1882             wrbuf_destroy(display_term);
1883             break;
1884         }
1885         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1886         result_sets[num_result_sets] =
1887             rset_trunc(zh, grep_info.isam_p_buf,
1888                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1889                        wrbuf_len(display_term), rank_type,
1890                        0 /* preserve position */,
1891                        zapt->term->which, rset_nmem, 
1892                        kc, kc->scope, ol, index_type,
1893                        hits_limit_value,
1894                        term_ref_id_str);
1895         wrbuf_destroy(display_term);
1896         if (!result_sets[num_result_sets])
1897             break;
1898         num_result_sets++;
1899         if (!*termp)
1900             break;
1901     }
1902     grep_info_delete(&grep_info);
1903
1904     if (res != ZEBRA_OK)
1905         return res;
1906     if (num_result_sets == 0)
1907         *rset = rset_create_null(rset_nmem, kc, 0);
1908     else if (num_result_sets == 1)
1909         *rset = result_sets[0];
1910     else
1911         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1912                                 num_result_sets, result_sets);
1913     if (!*rset)
1914         return ZEBRA_FAIL;
1915     return ZEBRA_OK;
1916 }
1917
1918 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1919                                       Z_AttributesPlusTerm *zapt,
1920                                       const char *termz,
1921                                       const Odr_oid *attributeSet,
1922                                       NMEM stream,
1923                                       const char *rank_type, NMEM rset_nmem,
1924                                       RSET *rset,
1925                                       struct rset_key_control *kc)
1926 {
1927     Record rec;
1928     zint sysno = atozint(termz);
1929     
1930     if (sysno <= 0)
1931         sysno = 0;
1932     rec = rec_get(zh->reg->records, sysno);
1933     if (!rec)
1934         sysno = 0;
1935
1936     rec_free(&rec);
1937
1938     if (sysno <= 0)
1939     {
1940         *rset = rset_create_null(rset_nmem, kc, 0);
1941     }
1942     else
1943     {
1944         RSFD rsfd;
1945         struct it_key key;
1946         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1947                                  res_get(zh->res, "setTmpDir"), 0);
1948         rsfd = rset_open(*rset, RSETF_WRITE);
1949         
1950         key.mem[0] = sysno;
1951         key.mem[1] = 1;
1952         key.len = 2;
1953         rset_write(rsfd, &key);
1954         rset_close(rsfd);
1955     }
1956     return ZEBRA_OK;
1957 }
1958
1959 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1960                                const Odr_oid *attributeSet, NMEM stream,
1961                                Z_SortKeySpecList *sort_sequence,
1962                                const char *rank_type,
1963                                NMEM rset_nmem,
1964                                RSET *rset,
1965                                struct rset_key_control *kc)
1966 {
1967     int i;
1968     int sort_relation_value;
1969     AttrType sort_relation_type;
1970     Z_SortKeySpec *sks;
1971     Z_SortKey *sk;
1972     char termz[20];
1973     
1974     attr_init_APT(&sort_relation_type, zapt, 7);
1975     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1976
1977     if (!sort_sequence->specs)
1978     {
1979         sort_sequence->num_specs = 10;
1980         sort_sequence->specs = (Z_SortKeySpec **)
1981             nmem_malloc(stream, sort_sequence->num_specs *
1982                         sizeof(*sort_sequence->specs));
1983         for (i = 0; i<sort_sequence->num_specs; i++)
1984             sort_sequence->specs[i] = 0;
1985     }
1986     if (zapt->term->which != Z_Term_general)
1987         i = 0;
1988     else
1989         i = atoi_n((char *) zapt->term->u.general->buf,
1990                    zapt->term->u.general->len);
1991     if (i >= sort_sequence->num_specs)
1992         i = 0;
1993     sprintf(termz, "%d", i);
1994
1995     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1996     sks->sortElement = (Z_SortElement *)
1997         nmem_malloc(stream, sizeof(*sks->sortElement));
1998     sks->sortElement->which = Z_SortElement_generic;
1999     sk = sks->sortElement->u.generic = (Z_SortKey *)
2000         nmem_malloc(stream, sizeof(*sk));
2001     sk->which = Z_SortKey_sortAttributes;
2002     sk->u.sortAttributes = (Z_SortAttributes *)
2003         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2004
2005     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2006     sk->u.sortAttributes->list = zapt->attributes;
2007
2008     sks->sortRelation = (Odr_int *)
2009         nmem_malloc(stream, sizeof(*sks->sortRelation));
2010     if (sort_relation_value == 1)
2011         *sks->sortRelation = Z_SortKeySpec_ascending;
2012     else if (sort_relation_value == 2)
2013         *sks->sortRelation = Z_SortKeySpec_descending;
2014     else 
2015         *sks->sortRelation = Z_SortKeySpec_ascending;
2016
2017     sks->caseSensitivity = (Odr_int *)
2018         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2019     *sks->caseSensitivity = 0;
2020
2021     sks->which = Z_SortKeySpec_null;
2022     sks->u.null = odr_nullval ();
2023     sort_sequence->specs[i] = sks;
2024     *rset = rset_create_null(rset_nmem, kc, 0);
2025     return ZEBRA_OK;
2026 }
2027
2028
2029 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2030                            const Odr_oid *attributeSet,
2031                            struct xpath_location_step *xpath, int max,
2032                            NMEM mem)
2033 {
2034     const Odr_oid *curAttributeSet = attributeSet;
2035     AttrType use;
2036     const char *use_string = 0;
2037     
2038     attr_init_APT(&use, zapt, 1);
2039     attr_find_ex(&use, &curAttributeSet, &use_string);
2040
2041     if (!use_string || *use_string != '/')
2042         return -1;
2043
2044     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2045 }
2046  
2047                
2048
2049 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2050                         const char *index_type, const char *term, 
2051                         const char *xpath_use,
2052                         NMEM rset_nmem,
2053                         struct rset_key_control *kc)
2054 {
2055     struct grep_info grep_info;
2056     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2057                                            zinfo_index_category_index,
2058                                            index_type, xpath_use);
2059     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2060         return rset_create_null(rset_nmem, kc, 0);
2061     
2062     if (ord < 0)
2063         return rset_create_null(rset_nmem, kc, 0);
2064     else
2065     {
2066         int i, r, max_pos;
2067         char ord_buf[32];
2068         RSET rset;
2069         WRBUF term_dict = wrbuf_alloc();
2070         int ord_len = key_SU_encode(ord, ord_buf);
2071         int term_type = Z_Term_characterString;
2072         const char *flags = "void";
2073
2074         wrbuf_putc(term_dict, '(');
2075         for (i = 0; i<ord_len; i++)
2076         {
2077             wrbuf_putc(term_dict, 1);
2078             wrbuf_putc(term_dict, ord_buf[i]);
2079         }
2080         wrbuf_putc(term_dict, ')');
2081         wrbuf_puts(term_dict, term);
2082         
2083         grep_info.isam_p_indx = 0;
2084         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2085                              &grep_info, &max_pos, 0, grep_handle);
2086         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2087                 grep_info.isam_p_indx);
2088         rset = rset_trunc(zh, grep_info.isam_p_buf,
2089                           grep_info.isam_p_indx, term, strlen(term),
2090                           flags, 1, term_type, rset_nmem,
2091                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2092                           0 /* term_ref_id_str */);
2093         grep_info_delete(&grep_info);
2094         wrbuf_destroy(term_dict);
2095         return rset;
2096     }
2097 }
2098
2099 static
2100 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2101                            NMEM stream, const char *rank_type, RSET rset,
2102                            int xpath_len, struct xpath_location_step *xpath,
2103                            NMEM rset_nmem,
2104                            RSET *rset_out,
2105                            struct rset_key_control *kc)
2106 {
2107     int i;
2108     int always_matches = rset ? 0 : 1;
2109
2110     if (xpath_len < 0)
2111     {
2112         *rset_out = rset;
2113         return ZEBRA_OK;
2114     }
2115
2116     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2117     for (i = 0; i<xpath_len; i++)
2118     {
2119         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2120
2121     }
2122
2123     /*
2124     //a    ->    a/.*
2125     //a/b  ->    b/a/.*
2126     /a     ->    a/
2127     /a/b   ->    b/a/
2128
2129     /      ->    none
2130
2131     a[@attr = value]/b[@other = othervalue]
2132
2133     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2134     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2135     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2136     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2137     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2138     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2139       
2140     */
2141
2142     dict_grep_cmap(zh->reg->dict, 0, 0);
2143     
2144     {
2145         int level = xpath_len;
2146         int first_path = 1;
2147         
2148         while (--level >= 0)
2149         {
2150             WRBUF xpath_rev = wrbuf_alloc();
2151             int i;
2152             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2153
2154             for (i = level; i >= 1; --i)
2155             {
2156                 const char *cp = xpath[i].part;
2157                 if (*cp)
2158                 {
2159                     for (; *cp; cp++)
2160                     {
2161                         if (*cp == '*')
2162                             wrbuf_puts(xpath_rev, "[^/]*");
2163                         else if (*cp == ' ')
2164                             wrbuf_puts(xpath_rev, "\001 ");
2165                         else
2166                             wrbuf_putc(xpath_rev, *cp);
2167
2168                         /* wrbuf_putc does not null-terminate , but
2169                            wrbuf_puts below ensures it does.. so xpath_rev
2170                            is OK iff length is > 0 */
2171                     }
2172                     wrbuf_puts(xpath_rev, "/");
2173                 }
2174                 else if (i == 1)  /* // case */
2175                     wrbuf_puts(xpath_rev, ".*");
2176             }
2177             if (xpath[level].predicate &&
2178                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2179                 xpath[level].predicate->u.relation.name[0])
2180             {
2181                 WRBUF wbuf = wrbuf_alloc();
2182                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2183                 if (xpath[level].predicate->u.relation.value)
2184                 {
2185                     const char *cp = xpath[level].predicate->u.relation.value;
2186                     wrbuf_putc(wbuf, '=');
2187                     
2188                     while (*cp)
2189                     {
2190                         if (strchr(REGEX_CHARS, *cp))
2191                             wrbuf_putc(wbuf, '\\');
2192                         wrbuf_putc(wbuf, *cp);
2193                         cp++;
2194                     }
2195                 }
2196                 rset_attr = xpath_trunc(
2197                     zh, stream, "0", wrbuf_cstr(wbuf), 
2198                     ZEBRA_XPATH_ATTR_NAME, 
2199                     rset_nmem, kc);
2200                 wrbuf_destroy(wbuf);
2201             } 
2202             else 
2203             {
2204                 if (!first_path)
2205                 {
2206                     wrbuf_destroy(xpath_rev);
2207                     continue;
2208                 }
2209             }
2210             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2211                     wrbuf_cstr(xpath_rev));
2212             if (wrbuf_len(xpath_rev))
2213             {
2214                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2215                                              wrbuf_cstr(xpath_rev),
2216                                              ZEBRA_XPATH_ELM_BEGIN, 
2217                                              rset_nmem, kc);
2218                 if (always_matches)
2219                     rset = rset_start_tag;
2220                 else
2221                 {
2222                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2223                                                wrbuf_cstr(xpath_rev),
2224                                                ZEBRA_XPATH_ELM_END, 
2225                                                rset_nmem, kc);
2226                     
2227                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2228                                                rset_start_tag, rset,
2229                                                rset_end_tag, rset_attr);
2230                 }
2231             }
2232             wrbuf_destroy(xpath_rev);
2233             first_path = 0;
2234         }
2235     }
2236     *rset_out = rset;
2237     return ZEBRA_OK;
2238 }
2239
2240 #define MAX_XPATH_STEPS 10
2241
2242 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2243                                      Z_AttributesPlusTerm *zapt,
2244                                      const Odr_oid *attributeSet,
2245                                      zint hits_limit, NMEM stream,
2246                                      Z_SortKeySpecList *sort_sequence,
2247                                      NMEM rset_nmem,
2248                                      RSET *rset,
2249                                      struct rset_key_control *kc);
2250
2251 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2252                                 const Odr_oid *attributeSet,
2253                                 zint hits_limit, NMEM stream,
2254                                 Z_SortKeySpecList *sort_sequence,
2255                                 int num_bases, const char **basenames, 
2256                                 NMEM rset_nmem,
2257                                 RSET *rset,
2258                                 struct rset_key_control *kc)
2259 {
2260     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2261     ZEBRA_RES res = ZEBRA_OK;
2262     int i;
2263     for (i = 0; i < num_bases; i++)
2264     {
2265
2266         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2267         {
2268             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2269                            basenames[i]);
2270             res = ZEBRA_FAIL;
2271             break;
2272         }
2273         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2274                                   sort_sequence,
2275                                   rset_nmem, rsets+i, kc);
2276         if (res != ZEBRA_OK)
2277             break;
2278     }
2279     if (res != ZEBRA_OK)
2280     {   /* must clean up the already created sets */
2281         while (--i >= 0)
2282             rset_delete(rsets[i]);
2283         *rset = 0;
2284     }
2285     else 
2286     {
2287         if (num_bases == 1)
2288             *rset = rsets[0];
2289         else if (num_bases == 0)
2290             *rset = rset_create_null(rset_nmem, kc, 0); 
2291         else
2292             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2293                                    num_bases, rsets);
2294     }
2295     return res;
2296 }
2297
2298 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2299                                      Z_AttributesPlusTerm *zapt,
2300                                      const Odr_oid *attributeSet,
2301                                      zint hits_limit, NMEM stream,
2302                                      Z_SortKeySpecList *sort_sequence,
2303                                      NMEM rset_nmem,
2304                                      RSET *rset,
2305                                      struct rset_key_control *kc)
2306 {
2307     ZEBRA_RES res = ZEBRA_OK;
2308     const char *index_type;
2309     char *search_type = NULL;
2310     char rank_type[128];
2311     int complete_flag;
2312     int sort_flag;
2313     char termz[IT_MAX_WORD+1];
2314     int xpath_len;
2315     const char *xpath_use = 0;
2316     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2317
2318     if (!log_level_set)
2319     {
2320         log_level_rpn = yaz_log_module_level("rpn");
2321         log_level_set = 1;
2322     }
2323     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2324                     rank_type, &complete_flag, &sort_flag);
2325     
2326     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2327     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2328     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2329     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2330
2331     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2332         return ZEBRA_FAIL;
2333
2334     if (sort_flag)
2335         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2336                              rank_type, rset_nmem, rset, kc);
2337     /* consider if an X-Path query is used */
2338     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2339                                 xpath, MAX_XPATH_STEPS, stream);
2340     if (xpath_len >= 0)
2341     {
2342         if (xpath[xpath_len-1].part[0] == '@') 
2343             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2344         else
2345             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2346
2347         if (1)
2348         {
2349             AttrType relation;
2350             int relation_value;
2351
2352             attr_init_APT(&relation, zapt, 2);
2353             relation_value = attr_find(&relation, NULL);
2354
2355             if (relation_value == 103) /* alwaysmatches */
2356             {
2357                 *rset = 0; /* signal no "term" set */
2358                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2359                                         xpath_len, xpath, rset_nmem, rset, kc);
2360             }
2361         }
2362     }
2363
2364     /* search using one of the various search type strategies
2365        termz is our UTF-8 search term
2366        attributeSet is top-level default attribute set 
2367        stream is ODR for search
2368        reg_id is the register type
2369        complete_flag is 1 for complete subfield, 0 for incomplete
2370        xpath_use is use-attribute to be used for X-Path search, 0 for none
2371     */
2372     if (!strcmp(search_type, "phrase"))
2373     {
2374         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2375                                     stream,
2376                                     index_type, complete_flag, rank_type,
2377                                     xpath_use,
2378                                     rset_nmem,
2379                                     rset, kc);
2380     }
2381     else if (!strcmp(search_type, "and-list"))
2382     {
2383         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2384                                       stream,
2385                                       index_type, complete_flag, rank_type,
2386                                       xpath_use,
2387                                       rset_nmem,
2388                                       rset, kc);
2389     }
2390     else if (!strcmp(search_type, "or-list"))
2391     {
2392         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2393                                      stream,
2394                                      index_type, complete_flag, rank_type,
2395                                      xpath_use,
2396                                      rset_nmem,
2397                                      rset, kc);
2398     }
2399     else if (!strcmp(search_type, "local"))
2400     {
2401         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2402                                    rank_type, rset_nmem, rset, kc);
2403     }
2404     else if (!strcmp(search_type, "numeric"))
2405     {
2406         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2407                                      stream,
2408                                      index_type, complete_flag, rank_type,
2409                                      xpath_use,
2410                                      rset_nmem,
2411                                      rset, kc);
2412     }
2413     else
2414     {
2415         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2416         res = ZEBRA_FAIL;
2417     }
2418     if (res != ZEBRA_OK)
2419         return res;
2420     if (!*rset)
2421         return ZEBRA_FAIL;
2422     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2423                             xpath_len, xpath, rset_nmem, rset, kc);
2424 }
2425
2426 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2427                                       const Odr_oid *attributeSet,
2428                                       zint hits_limit,
2429                                       NMEM stream, NMEM rset_nmem,
2430                                       Z_SortKeySpecList *sort_sequence,
2431                                       int num_bases, const char **basenames,
2432                                       RSET **result_sets, int *num_result_sets,
2433                                       Z_Operator *parent_op,
2434                                       struct rset_key_control *kc);
2435
2436 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2437                                    zint *approx_limit)
2438 {
2439     ZEBRA_RES res = ZEBRA_OK;
2440     if (zs->which == Z_RPNStructure_complex)
2441     {
2442         if (res == ZEBRA_OK)
2443             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2444                                            approx_limit);
2445         if (res == ZEBRA_OK)
2446             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2447                                            approx_limit);
2448     }
2449     else if (zs->which == Z_RPNStructure_simple)
2450     {
2451         if (zs->u.simple->which == Z_Operand_APT)
2452         {
2453             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2454             AttrType global_hits_limit_attr;
2455             int l;
2456             
2457             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2458             
2459             l = attr_find(&global_hits_limit_attr, NULL);
2460             if (l != -1)
2461                 *approx_limit = l;
2462         }
2463     }
2464     return res;
2465 }
2466
2467 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2468                          const Odr_oid *attributeSet, 
2469                          zint hits_limit,
2470                          NMEM stream, NMEM rset_nmem,
2471                          Z_SortKeySpecList *sort_sequence,
2472                          int num_bases, const char **basenames,
2473                          RSET *result_set)
2474 {
2475     RSET *result_sets = 0;
2476     int num_result_sets = 0;
2477     ZEBRA_RES res;
2478     struct rset_key_control *kc = zebra_key_control_create(zh);
2479
2480     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2481                                stream, rset_nmem,
2482                                sort_sequence, 
2483                                num_bases, basenames,
2484                                &result_sets, &num_result_sets,
2485                                0 /* no parent op */,
2486                                kc);
2487     if (res != ZEBRA_OK)
2488     {
2489         int i;
2490         for (i = 0; i<num_result_sets; i++)
2491             rset_delete(result_sets[i]);
2492         *result_set = 0;
2493     }
2494     else
2495     {
2496         assert(num_result_sets == 1);
2497         assert(result_sets);
2498         assert(*result_sets);
2499         *result_set = *result_sets;
2500     }
2501     (*kc->dec)(kc);
2502     return res;
2503 }
2504
2505 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2506                                const Odr_oid *attributeSet, zint hits_limit,
2507                                NMEM stream, NMEM rset_nmem,
2508                                Z_SortKeySpecList *sort_sequence,
2509                                int num_bases, const char **basenames,
2510                                RSET **result_sets, int *num_result_sets,
2511                                Z_Operator *parent_op,
2512                                struct rset_key_control *kc)
2513 {
2514     *num_result_sets = 0;
2515     if (zs->which == Z_RPNStructure_complex)
2516     {
2517         ZEBRA_RES res;
2518         Z_Operator *zop = zs->u.complex->roperator;
2519         RSET *result_sets_l = 0;
2520         int num_result_sets_l = 0;
2521         RSET *result_sets_r = 0;
2522         int num_result_sets_r = 0;
2523
2524         res = rpn_search_structure(zh, zs->u.complex->s1,
2525                                    attributeSet, hits_limit, stream, rset_nmem,
2526                                    sort_sequence,
2527                                    num_bases, basenames,
2528                                    &result_sets_l, &num_result_sets_l,
2529                                    zop, kc);
2530         if (res != ZEBRA_OK)
2531         {
2532             int i;
2533             for (i = 0; i<num_result_sets_l; i++)
2534                 rset_delete(result_sets_l[i]);
2535             return res;
2536         }
2537         res = rpn_search_structure(zh, zs->u.complex->s2,
2538                                    attributeSet, hits_limit, stream, rset_nmem,
2539                                    sort_sequence,
2540                                    num_bases, basenames,
2541                                    &result_sets_r, &num_result_sets_r,
2542                                    zop, kc);
2543         if (res != ZEBRA_OK)
2544         {
2545             int i;
2546             for (i = 0; i<num_result_sets_l; i++)
2547                 rset_delete(result_sets_l[i]);
2548             for (i = 0; i<num_result_sets_r; i++)
2549                 rset_delete(result_sets_r[i]);
2550             return res;
2551         }
2552
2553         /* make a new list of result for all children */
2554         *num_result_sets = num_result_sets_l + num_result_sets_r;
2555         *result_sets = nmem_malloc(stream, *num_result_sets * 
2556                                    sizeof(**result_sets));
2557         memcpy(*result_sets, result_sets_l, 
2558                num_result_sets_l * sizeof(**result_sets));
2559         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2560                num_result_sets_r * sizeof(**result_sets));
2561
2562         if (!parent_op || parent_op->which != zop->which
2563             || (zop->which != Z_Operator_and &&
2564                 zop->which != Z_Operator_or))
2565         {
2566             /* parent node different from this one (or non-present) */
2567             /* we must combine result sets now */
2568             RSET rset;
2569             switch (zop->which)
2570             {
2571             case Z_Operator_and:
2572                 rset = rset_create_and(rset_nmem, kc,
2573                                        kc->scope,
2574                                        *num_result_sets, *result_sets);
2575                 break;
2576             case Z_Operator_or:
2577                 rset = rset_create_or(rset_nmem, kc,
2578                                       kc->scope, 0, /* termid */
2579                                       *num_result_sets, *result_sets);
2580                 break;
2581             case Z_Operator_and_not:
2582                 rset = rset_create_not(rset_nmem, kc,
2583                                        kc->scope,
2584                                        (*result_sets)[0],
2585                                        (*result_sets)[1]);
2586                 break;
2587             case Z_Operator_prox:
2588                 if (zop->u.prox->which != Z_ProximityOperator_known)
2589                 {
2590                     zebra_setError(zh, 
2591                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2592                                    0);
2593                     return ZEBRA_FAIL;
2594                 }
2595                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2596                 {
2597                     zebra_setError_zint(zh,
2598                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2599                                         *zop->u.prox->u.known);
2600                     return ZEBRA_FAIL;
2601                 }
2602                 else
2603                 {
2604                     rset = rset_create_prox(rset_nmem, kc,
2605                                             kc->scope,
2606                                             *num_result_sets, *result_sets, 
2607                                             *zop->u.prox->ordered,
2608                                             (!zop->u.prox->exclusion ? 
2609                                              0 : *zop->u.prox->exclusion),
2610                                             *zop->u.prox->relationType,
2611                                             *zop->u.prox->distance );
2612                 }
2613                 break;
2614             default:
2615                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2616                 return ZEBRA_FAIL;
2617             }
2618             *num_result_sets = 1;
2619             *result_sets = nmem_malloc(stream, *num_result_sets * 
2620                                        sizeof(**result_sets));
2621             (*result_sets)[0] = rset;
2622         }
2623     }
2624     else if (zs->which == Z_RPNStructure_simple)
2625     {
2626         RSET rset;
2627         ZEBRA_RES res;
2628
2629         if (zs->u.simple->which == Z_Operand_APT)
2630         {
2631             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2632             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2633                                  attributeSet, hits_limit,
2634                                  stream, sort_sequence,
2635                                  num_bases, basenames, rset_nmem, &rset,
2636                                  kc);
2637             if (res != ZEBRA_OK)
2638                 return res;
2639         }
2640         else if (zs->u.simple->which == Z_Operand_resultSetId)
2641         {
2642             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2643             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2644             if (!rset)
2645             {
2646                 zebra_setError(zh, 
2647                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2648                                zs->u.simple->u.resultSetId);
2649                 return ZEBRA_FAIL;
2650             }
2651             rset_dup(rset);
2652         }
2653         else
2654         {
2655             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2656             return ZEBRA_FAIL;
2657         }
2658         *num_result_sets = 1;
2659         *result_sets = nmem_malloc(stream, *num_result_sets * 
2660                                    sizeof(**result_sets));
2661         (*result_sets)[0] = rset;
2662     }
2663     else
2664     {
2665         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2666         return ZEBRA_FAIL;
2667     }
2668     return ZEBRA_OK;
2669 }
2670
2671
2672
2673 /*
2674  * Local variables:
2675  * c-basic-offset: 4
2676  * c-file-style: "Stroustrup"
2677  * indent-tabs-mode: nil
2678  * End:
2679  * vim: shiftwidth=4 tabstop=8 expandtab
2680  */
2681