Escape backslash for ICU terms in searches
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.29 2007-12-17 12:24:50 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, const char *ct2, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         if (ct2 && strchr(ct2, *s0))
171             break;
172         s1 = s0;
173         map = zebra_maps_input(zm, &s1, strlen(s1), first);
174         if (**map != *CHR_SPACE)
175             break;
176         s0 = s1;
177     }
178     *src = s0;
179     return *s0;
180 }
181
182
183 static void esc_str(char *out_buf, size_t out_size,
184                     const char *in_buf, int in_size)
185 {
186     int k;
187
188     assert(out_buf);
189     assert(in_buf);
190     assert(out_size > 20);
191     *out_buf = '\0';
192     for (k = 0; k<in_size; k++)
193     {
194         int c = in_buf[k] & 0xff;
195         int pc;
196         if (c < 32 || c > 126)
197             pc = '?';
198         else
199             pc = c;
200         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
201         if (strlen(out_buf) > out_size-20)
202         {
203             strcat(out_buf, "..");
204             break;
205         }
206     }
207 }
208
209 #define REGEX_CHARS " []()|.*+?!"
210
211 static void add_non_space(const char *start, const char *end,
212                           WRBUF term_dict,
213                           WRBUF display_term,
214                           const char **map, int q_map_match)
215 {
216     size_t sz = end - start;
217
218     wrbuf_write(display_term, start, sz);
219     if (!q_map_match)
220     {
221         while (start < end)
222         {
223             if (strchr(REGEX_CHARS, *start))
224                 wrbuf_putc(term_dict, '\\');
225             wrbuf_putc(term_dict, *start);
226             start++;
227         }
228     }
229     else
230     {
231         char tmpbuf[80];
232         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
233         
234         wrbuf_puts(term_dict, map[0]);
235     }
236 }
237
238
239 static int term_100_icu(zebra_map_t zm,
240                         const char **src, WRBUF term_dict, int space_split,
241                         WRBUF display_term)
242 {
243     int i;
244     const char *res_buf = 0;
245     size_t res_len = 0;
246     const char *display_buf;
247     size_t display_len;
248     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
249                                  &display_buf, &display_len))
250     {
251         *src += strlen(*src);
252         return 0;
253     }
254     wrbuf_write(display_term, display_buf, display_len);
255     for (i = 0; i < res_len; i++)
256     {
257         if (strchr(REGEX_CHARS "\\", res_buf[i]))
258             wrbuf_putc(term_dict, '\\');
259         if (res_buf[i] < 32)
260             wrbuf_putc(term_dict, 1);
261         wrbuf_putc(term_dict, res_buf[i]);
262     }
263     return 1;
264 }
265
266 /* term_100: handle term, where trunc = none(no operators at all) */
267 static int term_100(zebra_map_t zm,
268                     const char **src, WRBUF term_dict, int space_split,
269                     WRBUF display_term)
270 {
271     const char *s0;
272     const char **map;
273     int i = 0;
274
275     const char *space_start = 0;
276     const char *space_end = 0;
277
278     if (zebra_maps_is_icu(zm))
279         return term_100_icu(zm, src, term_dict, space_split, display_term);
280
281     if (!term_pre(zm, src, NULL, NULL, !space_split))
282         return 0;
283     s0 = *src;
284     while (*s0)
285     {
286         const char *s1 = s0;
287         int q_map_match = 0;
288         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
289         if (space_split)
290         {
291             if (**map == *CHR_SPACE)
292                 break;
293         }
294         else  /* complete subfield only. */
295         {
296             if (**map == *CHR_SPACE)
297             {   /* save space mapping for later  .. */
298                 space_start = s1;
299                 space_end = s0;
300                 continue;
301             }
302             else if (space_start)
303             {   /* reload last space */
304                 while (space_start < space_end)
305                 {
306                     if (strchr(REGEX_CHARS, *space_start))
307                         wrbuf_putc(term_dict, '\\');
308                     wrbuf_putc(display_term, *space_start);
309                     wrbuf_putc(term_dict, *space_start);
310                     space_start++;
311                                
312                 }
313                 /* and reset */
314                 space_start = space_end = 0;
315             }
316         }
317         i++;
318
319         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
320     }
321     *src = s0;
322     return i;
323 }
324
325 /* term_101: handle term, where trunc = Process # */
326 static int term_101(zebra_map_t zm,
327                     const char **src, WRBUF term_dict, int space_split,
328                     WRBUF display_term)
329 {
330     const char *s0;
331     const char **map;
332     int i = 0;
333
334     if (!term_pre(zm, src, "#", "#", !space_split))
335         return 0;
336     s0 = *src;
337     while (*s0)
338     {
339         if (*s0 == '#')
340         {
341             i++;
342             wrbuf_puts(term_dict, ".*");
343             wrbuf_putc(display_term, *s0);
344             s0++;
345         }
346         else
347         {
348             const char *s1 = s0;
349             int q_map_match = 0;
350             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
351             if (space_split && **map == *CHR_SPACE)
352                 break;
353
354             i++;
355             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
356         }
357     }
358     *src = s0;
359     return i;
360 }
361
362 /* term_103: handle term, where trunc = re-2 (regular expressions) */
363 static int term_103(zebra_map_t zm, const char **src,
364                     WRBUF term_dict, int *errors, int space_split,
365                     WRBUF display_term)
366 {
367     int i = 0;
368     const char *s0;
369     const char **map;
370
371     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
372         return 0;
373     s0 = *src;
374     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
375         isdigit(((const unsigned char *)s0)[1]))
376     {
377         *errors = s0[1] - '0';
378         s0 += 3;
379         if (*errors > 3)
380             *errors = 3;
381     }
382     while (*s0)
383     {
384         if (strchr("^\\()[].*+?|-", *s0))
385         {
386             wrbuf_putc(display_term, *s0);
387             wrbuf_putc(term_dict, *s0);
388             s0++;
389             i++;
390         }
391         else
392         {
393             const char *s1 = s0;
394             int q_map_match = 0;
395             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
396             if (space_split && **map == *CHR_SPACE)
397                 break;
398
399             i++;
400             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
401         }
402     }
403     *src = s0;
404     
405     return i;
406 }
407
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(zebra_map_t zm, const char **src,
410                     WRBUF term_dict, int space_split, WRBUF display_term)
411 {
412     return term_103(zm, src, term_dict, NULL, space_split, display_term);
413 }
414
415
416 /* term_104: handle term, process # and ! */
417 static int term_104(zebra_map_t zm, const char **src, 
418                     WRBUF term_dict, int space_split, WRBUF display_term)
419 {
420     const char *s0;
421     const char **map;
422     int i = 0;
423
424     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
425         return 0;
426     s0 = *src;
427     while (*s0)
428     {
429         if (*s0 == '?')
430         {
431             i++;
432             wrbuf_putc(display_term, *s0);
433             s0++;
434             if (*s0 >= '0' && *s0 <= '9')
435             {
436                 int limit = 0;
437                 while (*s0 >= '0' && *s0 <= '9')
438                 {
439                     limit = limit * 10 + (*s0 - '0');
440                     wrbuf_putc(display_term, *s0);
441                     s0++;
442                 }
443                 if (limit > 20)
444                     limit = 20;
445                 while (--limit >= 0)
446                 {
447                     wrbuf_puts(term_dict, ".?");
448                 }
449             }
450             else
451             {
452                 wrbuf_puts(term_dict, ".*");
453             }
454         }
455         else if (*s0 == '*')
456         {
457             i++;
458             wrbuf_puts(term_dict, ".*");
459             wrbuf_putc(display_term, *s0);
460             s0++;
461         }
462         else if (*s0 == '#')
463         {
464             i++;
465             wrbuf_puts(term_dict, ".");
466             wrbuf_putc(display_term, *s0);
467             s0++;
468         }
469         else
470         {
471             const char *s1 = s0;
472             int q_map_match = 0;
473             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
474             if (space_split && **map == *CHR_SPACE)
475                 break;
476
477             i++;
478             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
479         }
480     }
481     *src = s0;
482     return i;
483 }
484
485 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
486 static int term_105(zebra_map_t zm, const char **src, 
487                     WRBUF term_dict, int space_split,
488                     WRBUF display_term, int right_truncate)
489 {
490     const char *s0;
491     const char **map;
492     int i = 0;
493
494     if (!term_pre(zm, src, "*!", "*!", !space_split))
495         return 0;
496     s0 = *src;
497     while (*s0)
498     {
499         if (*s0 == '*')
500         {
501             i++;
502             wrbuf_puts(term_dict, ".*");
503             wrbuf_putc(display_term, *s0);
504             s0++;
505         }
506         else if (*s0 == '!')
507         {
508             i++;
509             wrbuf_putc(term_dict, '.');
510             wrbuf_putc(display_term, *s0);
511             s0++;
512         }
513         else
514         {
515             const char *s1 = s0;
516             int q_map_match = 0;
517             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
518             if (space_split && **map == *CHR_SPACE)
519                 break;
520
521             i++;
522             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
523         }
524     }
525     if (right_truncate)
526         wrbuf_puts(term_dict, ".*");
527     *src = s0;
528     return i;
529 }
530
531
532 /* gen_regular_rel - generate regular expression from relation
533  *  val:     border value (inclusive)
534  *  islt:    1 if <=; 0 if >=.
535  */
536 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
537 {
538     char dst_buf[20*5*20]; /* assuming enough for expansion */
539     char *dst = dst_buf;
540     int dst_p;
541     int w, d, i;
542     int pos = 0;
543     char numstr[20];
544
545     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
546     if (val >= 0)
547     {
548         if (islt)
549             strcpy(dst, "(-[0-9]+|(");
550         else
551             strcpy(dst, "((");
552     } 
553     else
554     {
555         if (!islt)
556         {
557             strcpy(dst, "([0-9]+|-(");
558             islt = 1;
559         }
560         else
561         {
562             strcpy(dst, "(-(");
563             islt = 0;
564         }
565         val = -val;
566     }
567     dst_p = strlen(dst);
568     sprintf(numstr, "%d", val);
569     for (w = strlen(numstr); --w >= 0; pos++)
570     {
571         d = numstr[w];
572         if (pos > 0)
573         {
574             if (islt)
575             {
576                 if (d == '0')
577                     continue;
578                 d--;
579             } 
580             else
581             {
582                 if (d == '9')
583                     continue;
584                 d++;
585             }
586         }
587         
588         strcpy(dst + dst_p, numstr);
589         dst_p = strlen(dst) - pos - 1;
590
591         if (islt)
592         {
593             if (d != '0')
594             {
595                 dst[dst_p++] = '[';
596                 dst[dst_p++] = '0';
597                 dst[dst_p++] = '-';
598                 dst[dst_p++] = d;
599                 dst[dst_p++] = ']';
600             }
601             else
602                 dst[dst_p++] = d;
603         }
604         else
605         {
606             if (d != '9')
607             { 
608                 dst[dst_p++] = '[';
609                 dst[dst_p++] = d;
610                 dst[dst_p++] = '-';
611                 dst[dst_p++] = '9';
612                 dst[dst_p++] = ']';
613             }
614             else
615                 dst[dst_p++] = d;
616         }
617         for (i = 0; i<pos; i++)
618         {
619             dst[dst_p++] = '[';
620             dst[dst_p++] = '0';
621             dst[dst_p++] = '-';
622             dst[dst_p++] = '9';
623             dst[dst_p++] = ']';
624         }
625         dst[dst_p++] = '|';
626     }
627     dst[dst_p] = '\0';
628     if (islt)
629     {
630         /* match everything less than 10^(pos-1) */
631         strcat(dst, "0*");
632         for (i = 1; i<pos; i++)
633             strcat(dst, "[0-9]?");
634     }
635     else
636     {
637         /* match everything greater than 10^pos */
638         for (i = 0; i <= pos; i++)
639             strcat(dst, "[0-9]");
640         strcat(dst, "[0-9]*");
641     }
642     strcat(dst, "))");
643     wrbuf_puts(term_dict, dst);
644 }
645
646 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
647 {
648     const char *src = wrbuf_cstr(wsrc);
649     if (src[*indx] == '\\')
650     {
651         wrbuf_putc(term_p, src[*indx]);
652         (*indx)++;
653     }
654     wrbuf_putc(term_p, src[*indx]);
655     (*indx)++;
656 }
657
658 /*
659  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
660  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
661  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
662  *              ([^-a].*|a[^-b].*|ab[c-].*)
663  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
664  *              ([^a-].*|a[^b-].*|ab[^c-].*)
665  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
666  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
667  */
668 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
669                            const char **term_sub, WRBUF term_dict,
670                            const Odr_oid *attributeSet,
671                            zebra_map_t zm, int space_split, 
672                            WRBUF display_term,
673                            int *error_code)
674 {
675     AttrType relation;
676     int relation_value;
677     int i;
678     WRBUF term_component = wrbuf_alloc();
679
680     attr_init_APT(&relation, zapt, 2);
681     relation_value = attr_find(&relation, NULL);
682
683     *error_code = 0;
684     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
685     switch (relation_value)
686     {
687     case 1:
688         if (!term_100(zm, term_sub, term_component, space_split, display_term))
689         {
690             wrbuf_destroy(term_component);
691             return 0;
692         }
693         yaz_log(log_level_rpn, "Relation <");
694         
695         wrbuf_putc(term_dict, '(');
696         for (i = 0; i < wrbuf_len(term_component); )
697         {
698             int j = 0;
699             
700             if (i)
701                 wrbuf_putc(term_dict, '|');
702             while (j < i)
703                 string_rel_add_char(term_dict, term_component, &j);
704
705             wrbuf_putc(term_dict, '[');
706
707             wrbuf_putc(term_dict, '^');
708             
709             wrbuf_putc(term_dict, 1);
710             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
711             
712             string_rel_add_char(term_dict, term_component, &i);
713             wrbuf_putc(term_dict, '-');
714             
715             wrbuf_putc(term_dict, ']');
716             wrbuf_putc(term_dict, '.');
717             wrbuf_putc(term_dict, '*');
718         }
719         wrbuf_putc(term_dict, ')');
720         break;
721     case 2:
722         if (!term_100(zm, term_sub, term_component, space_split, display_term))
723         {
724             wrbuf_destroy(term_component);
725             return 0;
726         }
727         yaz_log(log_level_rpn, "Relation <=");
728
729         wrbuf_putc(term_dict, '(');
730         for (i = 0; i < wrbuf_len(term_component); )
731         {
732             int j = 0;
733
734             while (j < i)
735                 string_rel_add_char(term_dict, term_component, &j);
736             wrbuf_putc(term_dict, '[');
737
738             wrbuf_putc(term_dict, '^');
739
740             wrbuf_putc(term_dict, 1);
741             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
742
743             string_rel_add_char(term_dict, term_component, &i);
744             wrbuf_putc(term_dict, '-');
745
746             wrbuf_putc(term_dict, ']');
747             wrbuf_putc(term_dict, '.');
748             wrbuf_putc(term_dict, '*');
749
750             wrbuf_putc(term_dict, '|');
751         }
752         for (i = 0; i < wrbuf_len(term_component); )
753             string_rel_add_char(term_dict, term_component, &i);
754         wrbuf_putc(term_dict, ')');
755         break;
756     case 5:
757         if (!term_100(zm, term_sub, term_component, space_split, display_term))
758         {
759             wrbuf_destroy(term_component);
760             return 0;
761         }
762         yaz_log(log_level_rpn, "Relation >");
763
764         wrbuf_putc(term_dict, '(');
765         for (i = 0; i < wrbuf_len(term_component); )
766         {
767             int j = 0;
768
769             while (j < i)
770                 string_rel_add_char(term_dict, term_component, &j);
771             wrbuf_putc(term_dict, '[');
772             
773             wrbuf_putc(term_dict, '^');
774             wrbuf_putc(term_dict, '-');
775             string_rel_add_char(term_dict, term_component, &i);
776
777             wrbuf_putc(term_dict, ']');
778             wrbuf_putc(term_dict, '.');
779             wrbuf_putc(term_dict, '*');
780
781             wrbuf_putc(term_dict, '|');
782         }
783         for (i = 0; i < wrbuf_len(term_component); )
784             string_rel_add_char(term_dict, term_component, &i);
785         wrbuf_putc(term_dict, '.');
786         wrbuf_putc(term_dict, '+');
787         wrbuf_putc(term_dict, ')');
788         break;
789     case 4:
790         if (!term_100(zm, term_sub, term_component, space_split, display_term))
791         {
792             wrbuf_destroy(term_component);
793             return 0;
794         }
795         yaz_log(log_level_rpn, "Relation >=");
796
797         wrbuf_putc(term_dict, '(');
798         for (i = 0; i < wrbuf_len(term_component); )
799         {
800             int j = 0;
801
802             if (i)
803                 wrbuf_putc(term_dict, '|');
804             while (j < i)
805                 string_rel_add_char(term_dict, term_component, &j);
806             wrbuf_putc(term_dict, '[');
807
808             if (i < wrbuf_len(term_component)-1)
809             {
810                 wrbuf_putc(term_dict, '^');
811                 wrbuf_putc(term_dict, '-');
812                 string_rel_add_char(term_dict, term_component, &i);
813             }
814             else
815             {
816                 string_rel_add_char(term_dict, term_component, &i);
817                 wrbuf_putc(term_dict, '-');
818             }
819             wrbuf_putc(term_dict, ']');
820             wrbuf_putc(term_dict, '.');
821             wrbuf_putc(term_dict, '*');
822         }
823         wrbuf_putc(term_dict, ')');
824         break;
825     case 3:
826     case 102:
827     case -1:
828         if (!**term_sub)
829             return 1;
830         yaz_log(log_level_rpn, "Relation =");
831         if (!term_100(zm, term_sub, term_component, space_split, display_term))
832         {
833             wrbuf_destroy(term_component);
834             return 0;
835         }
836         wrbuf_puts(term_dict, "(");
837         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
838         wrbuf_puts(term_dict, ")");
839         break;
840     case 103:
841         yaz_log(log_level_rpn, "Relation always matches");
842         /* skip to end of term (we don't care what it is) */
843         while (**term_sub != '\0')
844             (*term_sub)++;
845         break;
846     default:
847         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
848         wrbuf_destroy(term_component);
849         return 0;
850     }
851     wrbuf_destroy(term_component);
852     return 1;
853 }
854
855 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
856                              const char **term_sub, 
857                              WRBUF term_dict,
858                              const Odr_oid *attributeSet, NMEM stream,
859                              struct grep_info *grep_info,
860                              const char *index_type, int complete_flag,
861                              WRBUF display_term,
862                              const char *xpath_use,
863                              struct ord_list **ol,
864                              zebra_map_t zm);
865
866 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
867                                 Z_AttributesPlusTerm *zapt,
868                                 zint *hits_limit_value,
869                                 const char **term_ref_id_str,
870                                 NMEM nmem)
871 {
872     AttrType term_ref_id_attr;
873     AttrType hits_limit_attr;
874     int term_ref_id_int;
875  
876     attr_init_APT(&hits_limit_attr, zapt, 11);
877     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
878
879     attr_init_APT(&term_ref_id_attr, zapt, 10);
880     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
881     if (term_ref_id_int >= 0)
882     {
883         char *res = nmem_malloc(nmem, 20);
884         sprintf(res, "%d", term_ref_id_int);
885         *term_ref_id_str = res;
886     }
887
888     /* no limit given ? */
889     if (*hits_limit_value == -1)
890     {
891         if (*term_ref_id_str)
892         {
893             /* use global if term_ref is present */
894             *hits_limit_value = zh->approx_limit;
895         }
896         else
897         {
898             /* no counting if term_ref is not present */
899             *hits_limit_value = 0;
900         }
901     }
902     else if (*hits_limit_value == 0)
903     {
904         /* 0 is the same as global limit */
905         *hits_limit_value = zh->approx_limit;
906     }
907     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
908             *term_ref_id_str ? *term_ref_id_str : "none",
909             *hits_limit_value);
910     return ZEBRA_OK;
911 }
912
913 /** \brief search for term (which may be truncated)
914  */
915 static ZEBRA_RES search_term(ZebraHandle zh,
916                              Z_AttributesPlusTerm *zapt,
917                              const char **term_sub, 
918                              const Odr_oid *attributeSet, NMEM stream,
919                              struct grep_info *grep_info,
920                              const char *index_type, int complete_flag,
921                              const char *rank_type, 
922                              const char *xpath_use,
923                              NMEM rset_nmem,
924                              RSET *rset,
925                              struct rset_key_control *kc,
926                              zebra_map_t zm)
927 {
928     ZEBRA_RES res;
929     struct ord_list *ol;
930     zint hits_limit_value;
931     const char *term_ref_id_str = 0;
932     WRBUF term_dict = wrbuf_alloc();
933     WRBUF display_term = wrbuf_alloc();
934     *rset = 0;
935     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
936                           stream);
937     grep_info->isam_p_indx = 0;
938     res = string_term(zh, zapt, term_sub, term_dict,
939                       attributeSet, stream, grep_info,
940                       index_type, complete_flag,
941                       display_term, xpath_use, &ol, zm);
942     wrbuf_destroy(term_dict);
943     if (res == ZEBRA_OK && *term_sub)
944     {
945         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
946         *rset = rset_trunc(zh, grep_info->isam_p_buf,
947                            grep_info->isam_p_indx, wrbuf_buf(display_term),
948                            wrbuf_len(display_term), rank_type, 
949                            1 /* preserve pos */,
950                            zapt->term->which, rset_nmem,
951                            kc, kc->scope, ol, index_type, hits_limit_value,
952                            term_ref_id_str);
953         if (!*rset)
954             res = ZEBRA_FAIL;
955     }
956     wrbuf_destroy(display_term);
957     return res;
958 }
959
960 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
961                              const char **term_sub, 
962                              WRBUF term_dict,
963                              const Odr_oid *attributeSet, NMEM stream,
964                              struct grep_info *grep_info,
965                              const char *index_type, int complete_flag,
966                              WRBUF display_term,
967                              const char *xpath_use,
968                              struct ord_list **ol,
969                              zebra_map_t zm)
970 {
971     int r;
972     AttrType truncation;
973     int truncation_value;
974     const char *termp;
975     struct rpn_char_map_info rcmi;
976
977     int space_split = complete_flag ? 0 : 1;
978     int ord = -1;
979     int regex_range = 0;
980     int max_pos, prefix_len = 0;
981     int relation_error;
982     char ord_buf[32];
983     int ord_len, i;
984
985     *ol = ord_list_create(stream);
986
987     rpn_char_map_prepare(zh->reg, zm, &rcmi);
988     attr_init_APT(&truncation, zapt, 5);
989     truncation_value = attr_find(&truncation, NULL);
990     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
991
992     termp = *term_sub; /* start of term for each database */
993     
994     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
995                           attributeSet, &ord) != ZEBRA_OK)
996     {
997         *term_sub = 0;
998         return ZEBRA_FAIL;
999     }
1000     
1001     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1002     
1003     *ol = ord_list_append(stream, *ol, ord);
1004     ord_len = key_SU_encode(ord, ord_buf);
1005     
1006     wrbuf_putc(term_dict, '(');
1007     
1008     for (i = 0; i<ord_len; i++)
1009     {
1010         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1011         wrbuf_putc(term_dict, ord_buf[i]);
1012     }
1013     wrbuf_putc(term_dict, ')');
1014     
1015     prefix_len = wrbuf_len(term_dict);
1016     
1017     switch (truncation_value)
1018     {
1019     case -1:         /* not specified */
1020     case 100:        /* do not truncate */
1021         if (!string_relation(zh, zapt, &termp, term_dict,
1022                              attributeSet,
1023                              zm, space_split, display_term,
1024                              &relation_error))
1025         {
1026             if (relation_error)
1027             {
1028                 zebra_setError(zh, relation_error, 0);
1029                 return ZEBRA_FAIL;
1030             }
1031             *term_sub = 0;
1032             return ZEBRA_OK;
1033         }
1034         break;
1035     case 1:          /* right truncation */
1036         wrbuf_putc(term_dict, '(');
1037         if (!term_100(zm, &termp, term_dict, space_split, display_term))
1038         {
1039             *term_sub = 0;
1040             return ZEBRA_OK;
1041         }
1042         wrbuf_puts(term_dict, ".*)");
1043         break;
1044     case 2:          /* keft truncation */
1045         wrbuf_puts(term_dict, "(.*");
1046         if (!term_100(zm, &termp, term_dict, space_split, display_term))
1047         {
1048             *term_sub = 0;
1049             return ZEBRA_OK;
1050         }
1051         wrbuf_putc(term_dict, ')');
1052         break;
1053     case 3:          /* left&right truncation */
1054         wrbuf_puts(term_dict, "(.*");
1055         if (!term_100(zm, &termp, term_dict, space_split, display_term))
1056         {
1057             *term_sub = 0;
1058             return ZEBRA_OK;
1059         }
1060         wrbuf_puts(term_dict, ".*)");
1061         break;
1062     case 101:        /* process # in term */
1063         wrbuf_putc(term_dict, '(');
1064         if (!term_101(zm, &termp, term_dict, space_split, display_term))
1065         {
1066             *term_sub = 0;
1067             return ZEBRA_OK;
1068         }
1069         wrbuf_puts(term_dict, ")");
1070         break;
1071     case 102:        /* Regexp-1 */
1072         wrbuf_putc(term_dict, '(');
1073         if (!term_102(zm, &termp, term_dict, space_split, display_term))
1074         {
1075             *term_sub = 0;
1076             return ZEBRA_OK;
1077         }
1078         wrbuf_putc(term_dict, ')');
1079         break;
1080     case 103:       /* Regexp-2 */
1081         regex_range = 1;
1082         wrbuf_putc(term_dict, '(');
1083         if (!term_103(zm, &termp, term_dict, &regex_range,
1084                       space_split, display_term))
1085         {
1086             *term_sub = 0;
1087             return ZEBRA_OK;
1088         }
1089         wrbuf_putc(term_dict, ')');
1090         break;
1091     case 104:        /* process # and ! in term */
1092         wrbuf_putc(term_dict, '(');
1093         if (!term_104(zm, &termp, term_dict, space_split, display_term))
1094         {
1095             *term_sub = 0;
1096             return ZEBRA_OK;
1097         }
1098         wrbuf_putc(term_dict, ')');
1099         break;
1100     case 105:        /* process * and ! in term */
1101         wrbuf_putc(term_dict, '(');
1102         if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1103         {
1104             *term_sub = 0;
1105             return ZEBRA_OK;
1106         }
1107         wrbuf_putc(term_dict, ')');
1108         break;
1109     case 106:        /* process * and ! in term */
1110         wrbuf_putc(term_dict, '(');
1111         if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1112         {
1113             *term_sub = 0;
1114             return ZEBRA_OK;
1115         }
1116         wrbuf_putc(term_dict, ')');
1117         break;
1118     default:
1119         zebra_setError_zint(zh,
1120                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1121                             truncation_value);
1122         return ZEBRA_FAIL;
1123     }
1124     if (1)
1125     {
1126         char buf[1000];
1127         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1128         esc_str(buf, sizeof(buf), input, strlen(input));
1129     }
1130     {
1131         WRBUF pr_wr = wrbuf_alloc();
1132
1133         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1134         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1135         wrbuf_destroy(pr_wr);
1136     }
1137     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1138                          grep_info, &max_pos, 
1139                          ord_len /* number of "exact" chars */,
1140                          grep_handle);
1141     if (r == 1)
1142         zebra_set_partial_result(zh);
1143     else if (r)
1144         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1145     *term_sub = termp;
1146     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1147     return ZEBRA_OK;
1148 }
1149
1150
1151
1152 static void grep_info_delete(struct grep_info *grep_info)
1153 {
1154 #ifdef TERM_COUNT
1155     xfree(grep_info->term_no);
1156 #endif
1157     xfree(grep_info->isam_p_buf);
1158 }
1159
1160 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1161                                    Z_AttributesPlusTerm *zapt,
1162                                    struct grep_info *grep_info,
1163                                    const char *index_type)
1164 {
1165 #ifdef TERM_COUNT
1166     grep_info->term_no = 0;
1167 #endif
1168     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1169     grep_info->isam_p_size = 0;
1170     grep_info->isam_p_buf = NULL;
1171     grep_info->zh = zh;
1172     grep_info->index_type = index_type;
1173     grep_info->termset = 0;
1174     if (zapt)
1175     {
1176         AttrType truncmax;
1177         int truncmax_value;
1178
1179         attr_init_APT(&truncmax, zapt, 13);
1180         truncmax_value = attr_find(&truncmax, NULL);
1181         if (truncmax_value != -1)
1182             grep_info->trunc_max = truncmax_value;
1183     }
1184     if (zapt)
1185     {
1186         AttrType termset;
1187         int termset_value_numeric;
1188         const char *termset_value_string;
1189
1190         attr_init_APT(&termset, zapt, 8);
1191         termset_value_numeric =
1192             attr_find_ex(&termset, NULL, &termset_value_string);
1193         if (termset_value_numeric != -1)
1194         {
1195 #if TERMSET_DISABLE
1196             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1197             return ZEBRA_FAIL;
1198 #else
1199             char resname[32];
1200             const char *termset_name = 0;
1201             if (termset_value_numeric != -2)
1202             {
1203                 
1204                 sprintf(resname, "%d", termset_value_numeric);
1205                 termset_name = resname;
1206             }
1207             else
1208                 termset_name = termset_value_string;
1209             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1210             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1211             if (!grep_info->termset)
1212             {
1213                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1214                 return ZEBRA_FAIL;
1215             }
1216 #endif
1217         }
1218     }
1219     return ZEBRA_OK;
1220 }
1221
1222 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1223                                      Z_AttributesPlusTerm *zapt,
1224                                      const char *termz,
1225                                      const Odr_oid *attributeSet,
1226                                      NMEM stream,
1227                                      const char *index_type, int complete_flag,
1228                                      const char *rank_type,
1229                                      const char *xpath_use,
1230                                      NMEM rset_nmem,
1231                                      RSET **result_sets, int *num_result_sets,
1232                                      struct rset_key_control *kc,
1233                                      zebra_map_t zm)
1234 {
1235     struct grep_info grep_info;
1236     const char *termp = termz;
1237     int alloc_sets = 0;
1238     
1239     *num_result_sets = 0;
1240     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1241         return ZEBRA_FAIL;
1242     while (1)
1243     { 
1244         ZEBRA_RES res;
1245
1246         if (alloc_sets == *num_result_sets)
1247         {
1248             int add = 10;
1249             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1250                                               sizeof(*rnew));
1251             if (alloc_sets)
1252                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1253             alloc_sets = alloc_sets + add;
1254             *result_sets = rnew;
1255         }
1256         res = search_term(zh, zapt, &termp, attributeSet,
1257                           stream, &grep_info,
1258                           index_type, complete_flag,
1259                           rank_type,
1260                           xpath_use, rset_nmem,
1261                           &(*result_sets)[*num_result_sets],
1262                           kc, zm);
1263         if (res != ZEBRA_OK)
1264         {
1265             int i;
1266             for (i = 0; i < *num_result_sets; i++)
1267                 rset_delete((*result_sets)[i]);
1268             grep_info_delete(&grep_info);
1269             return res;
1270         }
1271         if ((*result_sets)[*num_result_sets] == 0)
1272             break;
1273         (*num_result_sets)++;
1274
1275         if (!*termp)
1276             break;
1277     }
1278     grep_info_delete(&grep_info);
1279     return ZEBRA_OK;
1280 }
1281                                
1282 /**
1283    \brief Create result set(s) for list of terms
1284    \param zh Zebra Handle
1285    \param zapt Attributes Plust Term (RPN leaf)
1286    \param termz term as used in query but converted to UTF-8
1287    \param attributeSet default attribute set
1288    \param stream memory for result
1289    \param index_type register type ("w", "p",..)
1290    \param complete_flag whether it's phrases or not
1291    \param rank_type term flags for ranking
1292    \param xpath_use use attribute for X-Path (-1 for no X-path)
1293    \param rset_nmem memory for result sets
1294    \param result_sets output result set for each term in list (output)
1295    \param num_result_sets number of output result sets
1296    \param kc rset key control to be used for created result sets
1297 */
1298 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1299                                    Z_AttributesPlusTerm *zapt,
1300                                    const char *termz,
1301                                    const Odr_oid *attributeSet,
1302                                    NMEM stream,
1303                                    const char *index_type, int complete_flag,
1304                                    const char *rank_type,
1305                                    const char *xpath_use,
1306                                    NMEM rset_nmem,
1307                                    RSET **result_sets, int *num_result_sets,
1308                                    struct rset_key_control *kc)
1309 {
1310     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1311     if (zebra_maps_is_icu(zm))
1312         zebra_map_tokenize_start(zm, termz, strlen(termz));
1313     return search_terms_chrmap(zh, zapt, termz, attributeSet,
1314                                stream, index_type, complete_flag,
1315                                rank_type, xpath_use,
1316                                rset_nmem, result_sets, num_result_sets,
1317                                kc, zm);
1318 }
1319
1320
1321 /** \brief limit a search by position - returns result set
1322  */
1323 static ZEBRA_RES search_position(ZebraHandle zh,
1324                                  Z_AttributesPlusTerm *zapt,
1325                                  const Odr_oid *attributeSet,
1326                                  const char *index_type,
1327                                  NMEM rset_nmem,
1328                                  RSET *rset,
1329                                  struct rset_key_control *kc)
1330 {
1331     int position_value;
1332     AttrType position;
1333     int ord = -1;
1334     char ord_buf[32];
1335     char term_dict[100];
1336     int ord_len;
1337     char *val;
1338     ISAM_P isam_p;
1339     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1340     
1341     attr_init_APT(&position, zapt, 3);
1342     position_value = attr_find(&position, NULL);
1343     switch(position_value)
1344     {
1345     case 3:
1346     case -1:
1347         return ZEBRA_OK;
1348     case 1:
1349     case 2:
1350         break;
1351     default:
1352         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1353                             position_value);
1354         return ZEBRA_FAIL;
1355     }
1356
1357
1358     if (!zebra_maps_is_first_in_field(zm))
1359     {
1360         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1361                             position_value);
1362         return ZEBRA_FAIL;
1363     }
1364
1365     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1366                           attributeSet, &ord) != ZEBRA_OK)
1367     {
1368         return ZEBRA_FAIL;
1369     }
1370     ord_len = key_SU_encode(ord, ord_buf);
1371     memcpy(term_dict, ord_buf, ord_len);
1372     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1373     val = dict_lookup(zh->reg->dict, term_dict);
1374     if (val)
1375     {
1376         assert(*val == sizeof(ISAM_P));
1377         memcpy(&isam_p, val+1, sizeof(isam_p));
1378
1379         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1380                                        isam_p, 0);
1381     }
1382     return ZEBRA_OK;
1383 }
1384
1385 /** \brief returns result set for phrase search
1386  */
1387 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1388                                        Z_AttributesPlusTerm *zapt,
1389                                        const char *termz_org,
1390                                        const Odr_oid *attributeSet,
1391                                        NMEM stream,
1392                                        const char *index_type,
1393                                        int complete_flag,
1394                                        const char *rank_type,
1395                                        const char *xpath_use,
1396                                        NMEM rset_nmem,
1397                                        RSET *rset,
1398                                        struct rset_key_control *kc)
1399 {
1400     RSET *result_sets = 0;
1401     int num_result_sets = 0;
1402     ZEBRA_RES res =
1403         search_terms_list(zh, zapt, termz_org, attributeSet,
1404                           stream, index_type, complete_flag,
1405                           rank_type, xpath_use,
1406                           rset_nmem,
1407                           &result_sets, &num_result_sets, kc);
1408     
1409     if (res != ZEBRA_OK)
1410         return res;
1411
1412     if (num_result_sets > 0)
1413     {
1414         RSET first_set = 0;
1415         res = search_position(zh, zapt, attributeSet, 
1416                               index_type,
1417                               rset_nmem, &first_set,
1418                               kc);
1419         if (res != ZEBRA_OK)
1420         {
1421             int i;
1422             for (i = 0; i<num_result_sets; i++)
1423                 rset_delete(result_sets[i]);
1424             return res;
1425         }
1426         if (first_set)
1427         {
1428             RSET *nsets = nmem_malloc(stream,
1429                                       sizeof(RSET) * (num_result_sets+1));
1430             nsets[0] = first_set;
1431             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1432             result_sets = nsets;
1433             num_result_sets++;
1434         }
1435     }
1436     if (num_result_sets == 0)
1437         *rset = rset_create_null(rset_nmem, kc, 0); 
1438     else if (num_result_sets == 1)
1439         *rset = result_sets[0];
1440     else
1441         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1442                                  num_result_sets, result_sets,
1443                                  1 /* ordered */, 0 /* exclusion */,
1444                                  3 /* relation */, 1 /* distance */);
1445     if (!*rset)
1446         return ZEBRA_FAIL;
1447     return ZEBRA_OK;
1448 }
1449
1450 /** \brief returns result set for or-list search
1451  */
1452 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1453                                         Z_AttributesPlusTerm *zapt,
1454                                         const char *termz_org,
1455                                         const Odr_oid *attributeSet,
1456                                         NMEM stream,
1457                                         const char *index_type, 
1458                                         int complete_flag,
1459                                         const char *rank_type,
1460                                         const char *xpath_use,
1461                                         NMEM rset_nmem,
1462                                         RSET *rset,
1463                                         struct rset_key_control *kc)
1464 {
1465     RSET *result_sets = 0;
1466     int num_result_sets = 0;
1467     int i;
1468     ZEBRA_RES res =
1469         search_terms_list(zh, zapt, termz_org, attributeSet,
1470                           stream, index_type, complete_flag,
1471                           rank_type, xpath_use,
1472                           rset_nmem,
1473                           &result_sets, &num_result_sets, kc);
1474     if (res != ZEBRA_OK)
1475         return res;
1476
1477     for (i = 0; i<num_result_sets; i++)
1478     {
1479         RSET first_set = 0;
1480         res = search_position(zh, zapt, attributeSet, 
1481                               index_type,
1482                               rset_nmem, &first_set,
1483                               kc);
1484         if (res != ZEBRA_OK)
1485         {
1486             for (i = 0; i<num_result_sets; i++)
1487                 rset_delete(result_sets[i]);
1488             return res;
1489         }
1490
1491         if (first_set)
1492         {
1493             RSET tmp_set[2];
1494
1495             tmp_set[0] = first_set;
1496             tmp_set[1] = result_sets[i];
1497             
1498             result_sets[i] = rset_create_prox(
1499                 rset_nmem, kc, kc->scope,
1500                 2, tmp_set,
1501                 1 /* ordered */, 0 /* exclusion */,
1502                 3 /* relation */, 1 /* distance */);
1503         }
1504     }
1505     if (num_result_sets == 0)
1506         *rset = rset_create_null(rset_nmem, kc, 0); 
1507     else if (num_result_sets == 1)
1508         *rset = result_sets[0];
1509     else
1510         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1511                                num_result_sets, result_sets);
1512     if (!*rset)
1513         return ZEBRA_FAIL;
1514     return ZEBRA_OK;
1515 }
1516
1517 /** \brief returns result set for and-list search
1518  */
1519 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1520                                          Z_AttributesPlusTerm *zapt,
1521                                          const char *termz_org,
1522                                          const Odr_oid *attributeSet,
1523                                          NMEM stream,
1524                                          const char *index_type, 
1525                                          int complete_flag,
1526                                          const char *rank_type, 
1527                                          const char *xpath_use,
1528                                          NMEM rset_nmem,
1529                                          RSET *rset,
1530                                          struct rset_key_control *kc)
1531 {
1532     RSET *result_sets = 0;
1533     int num_result_sets = 0;
1534     int i;
1535     ZEBRA_RES res =
1536         search_terms_list(zh, zapt, termz_org, attributeSet,
1537                           stream, index_type, complete_flag,
1538                           rank_type, xpath_use,
1539                           rset_nmem,
1540                           &result_sets, &num_result_sets,
1541                           kc);
1542     if (res != ZEBRA_OK)
1543         return res;
1544     for (i = 0; i<num_result_sets; i++)
1545     {
1546         RSET first_set = 0;
1547         res = search_position(zh, zapt, attributeSet, 
1548                               index_type,
1549                               rset_nmem, &first_set,
1550                               kc);
1551         if (res != ZEBRA_OK)
1552         {
1553             for (i = 0; i<num_result_sets; i++)
1554                 rset_delete(result_sets[i]);
1555             return res;
1556         }
1557
1558         if (first_set)
1559         {
1560             RSET tmp_set[2];
1561
1562             tmp_set[0] = first_set;
1563             tmp_set[1] = result_sets[i];
1564             
1565             result_sets[i] = rset_create_prox(
1566                 rset_nmem, kc, kc->scope,
1567                 2, tmp_set,
1568                 1 /* ordered */, 0 /* exclusion */,
1569                 3 /* relation */, 1 /* distance */);
1570         }
1571     }
1572
1573
1574     if (num_result_sets == 0)
1575         *rset = rset_create_null(rset_nmem, kc, 0); 
1576     else if (num_result_sets == 1)
1577         *rset = result_sets[0];
1578     else
1579         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1580                                 num_result_sets, result_sets);
1581     if (!*rset)
1582         return ZEBRA_FAIL;
1583     return ZEBRA_OK;
1584 }
1585
1586 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1587                             const char **term_sub,
1588                             WRBUF term_dict,
1589                             const Odr_oid *attributeSet,
1590                             struct grep_info *grep_info,
1591                             int *max_pos,
1592                             zebra_map_t zm,
1593                             WRBUF display_term,
1594                             int *error_code)
1595 {
1596     AttrType relation;
1597     int relation_value;
1598     int term_value;
1599     int r;
1600     WRBUF term_num = wrbuf_alloc();
1601
1602     *error_code = 0;
1603     attr_init_APT(&relation, zapt, 2);
1604     relation_value = attr_find(&relation, NULL);
1605
1606     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1607
1608     switch (relation_value)
1609     {
1610     case 1:
1611         yaz_log(log_level_rpn, "Relation <");
1612         if (!term_100(zm, term_sub, term_num, 1, display_term))
1613         { 
1614             wrbuf_destroy(term_num);
1615             return 0;
1616         }
1617         term_value = atoi(wrbuf_cstr(term_num));
1618         gen_regular_rel(term_dict, term_value-1, 1);
1619         break;
1620     case 2:
1621         yaz_log(log_level_rpn, "Relation <=");
1622         if (!term_100(zm, term_sub, term_num, 1, display_term))
1623         {
1624             wrbuf_destroy(term_num);
1625             return 0;
1626         }
1627         term_value = atoi(wrbuf_cstr(term_num));
1628         gen_regular_rel(term_dict, term_value, 1);
1629         break;
1630     case 4:
1631         yaz_log(log_level_rpn, "Relation >=");
1632         if (!term_100(zm, term_sub, term_num, 1, display_term))
1633         {
1634             wrbuf_destroy(term_num);
1635             return 0;
1636         }
1637         term_value = atoi(wrbuf_cstr(term_num));
1638         gen_regular_rel(term_dict, term_value, 0);
1639         break;
1640     case 5:
1641         yaz_log(log_level_rpn, "Relation >");
1642         if (!term_100(zm, term_sub, term_num, 1, display_term))
1643         {
1644             wrbuf_destroy(term_num);
1645             return 0;
1646         }
1647         term_value = atoi(wrbuf_cstr(term_num));
1648         gen_regular_rel(term_dict, term_value+1, 0);
1649         break;
1650     case -1:
1651     case 3:
1652         yaz_log(log_level_rpn, "Relation =");
1653         if (!term_100(zm, term_sub, term_num, 1, display_term))
1654         {
1655             wrbuf_destroy(term_num);
1656             return 0; 
1657         }
1658         term_value = atoi(wrbuf_cstr(term_num));
1659         wrbuf_printf(term_dict, "(0*%d)", term_value);
1660         break;
1661     case 103:
1662         /* term_tmp untouched.. */
1663         while (**term_sub != '\0')
1664             (*term_sub)++;
1665         break;
1666     default:
1667         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1668         wrbuf_destroy(term_num); 
1669         return 0;
1670     }
1671     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1672                          0, grep_info, max_pos, 0, grep_handle);
1673
1674     if (r == 1)
1675         zebra_set_partial_result(zh);
1676     else if (r)
1677         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1678     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1679     wrbuf_destroy(term_num);
1680     return 1;
1681 }
1682
1683 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1684                               const char **term_sub, 
1685                               WRBUF term_dict,
1686                               const Odr_oid *attributeSet, NMEM stream,
1687                               struct grep_info *grep_info,
1688                               const char *index_type, int complete_flag,
1689                               WRBUF display_term,
1690                               const char *xpath_use,
1691                               struct ord_list **ol)
1692 {
1693     const char *termp;
1694     struct rpn_char_map_info rcmi;
1695     int max_pos;
1696     int relation_error = 0;
1697     int ord, ord_len, i;
1698     char ord_buf[32];
1699     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1700     
1701     *ol = ord_list_create(stream);
1702
1703     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1704
1705     termp = *term_sub;
1706     
1707     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1708                           attributeSet, &ord) != ZEBRA_OK)
1709     {
1710         return ZEBRA_FAIL;
1711     }
1712     
1713     wrbuf_rewind(term_dict);
1714     
1715     *ol = ord_list_append(stream, *ol, ord);
1716     
1717     ord_len = key_SU_encode(ord, ord_buf);
1718     
1719     wrbuf_putc(term_dict, '(');
1720     for (i = 0; i < ord_len; i++)
1721     {
1722         wrbuf_putc(term_dict, 1);
1723         wrbuf_putc(term_dict, ord_buf[i]);
1724     }
1725     wrbuf_putc(term_dict, ')');
1726     
1727     if (!numeric_relation(zh, zapt, &termp, term_dict,
1728                           attributeSet, grep_info, &max_pos, zm,
1729                           display_term, &relation_error))
1730     {
1731         if (relation_error)
1732         {
1733             zebra_setError(zh, relation_error, 0);
1734             return ZEBRA_FAIL;
1735         }
1736         *term_sub = 0;
1737         return ZEBRA_OK;
1738     }
1739     *term_sub = termp;
1740     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1741     return ZEBRA_OK;
1742 }
1743
1744                                  
1745 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1746                                         Z_AttributesPlusTerm *zapt,
1747                                         const char *termz,
1748                                         const Odr_oid *attributeSet,
1749                                         NMEM stream,
1750                                         const char *index_type, 
1751                                         int complete_flag,
1752                                         const char *rank_type, 
1753                                         const char *xpath_use,
1754                                         NMEM rset_nmem,
1755                                         RSET *rset,
1756                                         struct rset_key_control *kc)
1757 {
1758     const char *termp = termz;
1759     RSET *result_sets = 0;
1760     int num_result_sets = 0;
1761     ZEBRA_RES res;
1762     struct grep_info grep_info;
1763     int alloc_sets = 0;
1764     zint hits_limit_value;
1765     const char *term_ref_id_str = 0;
1766
1767     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1768                           stream);
1769
1770     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1771     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1772         return ZEBRA_FAIL;
1773     while (1)
1774     { 
1775         struct ord_list *ol;
1776         WRBUF term_dict = wrbuf_alloc();
1777         WRBUF display_term = wrbuf_alloc();
1778         if (alloc_sets == num_result_sets)
1779         {
1780             int add = 10;
1781             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1782                                               sizeof(*rnew));
1783             if (alloc_sets)
1784                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1785             alloc_sets = alloc_sets + add;
1786             result_sets = rnew;
1787         }
1788         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1789         grep_info.isam_p_indx = 0;
1790         res = numeric_term(zh, zapt, &termp, term_dict,
1791                            attributeSet, stream, &grep_info,
1792                            index_type, complete_flag,
1793                            display_term, xpath_use, &ol);
1794         wrbuf_destroy(term_dict);
1795         if (res == ZEBRA_FAIL || termp == 0)
1796         {
1797             wrbuf_destroy(display_term);
1798             break;
1799         }
1800         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1801         result_sets[num_result_sets] =
1802             rset_trunc(zh, grep_info.isam_p_buf,
1803                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1804                        wrbuf_len(display_term), rank_type,
1805                        0 /* preserve position */,
1806                        zapt->term->which, rset_nmem, 
1807                        kc, kc->scope, ol, index_type,
1808                        hits_limit_value,
1809                        term_ref_id_str);
1810         wrbuf_destroy(display_term);
1811         if (!result_sets[num_result_sets])
1812             break;
1813         num_result_sets++;
1814         if (!*termp)
1815             break;
1816     }
1817     grep_info_delete(&grep_info);
1818
1819     if (res != ZEBRA_OK)
1820         return res;
1821     if (num_result_sets == 0)
1822         *rset = rset_create_null(rset_nmem, kc, 0);
1823     else if (num_result_sets == 1)
1824         *rset = result_sets[0];
1825     else
1826         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1827                                 num_result_sets, result_sets);
1828     if (!*rset)
1829         return ZEBRA_FAIL;
1830     return ZEBRA_OK;
1831 }
1832
1833 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1834                                       Z_AttributesPlusTerm *zapt,
1835                                       const char *termz,
1836                                       const Odr_oid *attributeSet,
1837                                       NMEM stream,
1838                                       const char *rank_type, NMEM rset_nmem,
1839                                       RSET *rset,
1840                                       struct rset_key_control *kc)
1841 {
1842     Record rec;
1843     zint sysno = atozint(termz);
1844     
1845     if (sysno <= 0)
1846         sysno = 0;
1847     rec = rec_get(zh->reg->records, sysno);
1848     if (!rec)
1849         sysno = 0;
1850
1851     rec_free(&rec);
1852
1853     if (sysno <= 0)
1854     {
1855         *rset = rset_create_null(rset_nmem, kc, 0);
1856     }
1857     else
1858     {
1859         RSFD rsfd;
1860         struct it_key key;
1861         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1862                                  res_get(zh->res, "setTmpDir"), 0);
1863         rsfd = rset_open(*rset, RSETF_WRITE);
1864         
1865         key.mem[0] = sysno;
1866         key.mem[1] = 1;
1867         key.len = 2;
1868         rset_write(rsfd, &key);
1869         rset_close(rsfd);
1870     }
1871     return ZEBRA_OK;
1872 }
1873
1874 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1875                                const Odr_oid *attributeSet, NMEM stream,
1876                                Z_SortKeySpecList *sort_sequence,
1877                                const char *rank_type,
1878                                NMEM rset_nmem,
1879                                RSET *rset,
1880                                struct rset_key_control *kc)
1881 {
1882     int i;
1883     int sort_relation_value;
1884     AttrType sort_relation_type;
1885     Z_SortKeySpec *sks;
1886     Z_SortKey *sk;
1887     char termz[20];
1888     
1889     attr_init_APT(&sort_relation_type, zapt, 7);
1890     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1891
1892     if (!sort_sequence->specs)
1893     {
1894         sort_sequence->num_specs = 10;
1895         sort_sequence->specs = (Z_SortKeySpec **)
1896             nmem_malloc(stream, sort_sequence->num_specs *
1897                         sizeof(*sort_sequence->specs));
1898         for (i = 0; i<sort_sequence->num_specs; i++)
1899             sort_sequence->specs[i] = 0;
1900     }
1901     if (zapt->term->which != Z_Term_general)
1902         i = 0;
1903     else
1904         i = atoi_n((char *) zapt->term->u.general->buf,
1905                    zapt->term->u.general->len);
1906     if (i >= sort_sequence->num_specs)
1907         i = 0;
1908     sprintf(termz, "%d", i);
1909
1910     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1911     sks->sortElement = (Z_SortElement *)
1912         nmem_malloc(stream, sizeof(*sks->sortElement));
1913     sks->sortElement->which = Z_SortElement_generic;
1914     sk = sks->sortElement->u.generic = (Z_SortKey *)
1915         nmem_malloc(stream, sizeof(*sk));
1916     sk->which = Z_SortKey_sortAttributes;
1917     sk->u.sortAttributes = (Z_SortAttributes *)
1918         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1919
1920     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1921     sk->u.sortAttributes->list = zapt->attributes;
1922
1923     sks->sortRelation = (int *)
1924         nmem_malloc(stream, sizeof(*sks->sortRelation));
1925     if (sort_relation_value == 1)
1926         *sks->sortRelation = Z_SortKeySpec_ascending;
1927     else if (sort_relation_value == 2)
1928         *sks->sortRelation = Z_SortKeySpec_descending;
1929     else 
1930         *sks->sortRelation = Z_SortKeySpec_ascending;
1931
1932     sks->caseSensitivity = (int *)
1933         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1934     *sks->caseSensitivity = 0;
1935
1936     sks->which = Z_SortKeySpec_null;
1937     sks->u.null = odr_nullval ();
1938     sort_sequence->specs[i] = sks;
1939     *rset = rset_create_null(rset_nmem, kc, 0);
1940     return ZEBRA_OK;
1941 }
1942
1943
1944 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1945                            const Odr_oid *attributeSet,
1946                            struct xpath_location_step *xpath, int max,
1947                            NMEM mem)
1948 {
1949     const Odr_oid *curAttributeSet = attributeSet;
1950     AttrType use;
1951     const char *use_string = 0;
1952     
1953     attr_init_APT(&use, zapt, 1);
1954     attr_find_ex(&use, &curAttributeSet, &use_string);
1955
1956     if (!use_string || *use_string != '/')
1957         return -1;
1958
1959     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1960 }
1961  
1962                
1963
1964 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1965                         const char *index_type, const char *term, 
1966                         const char *xpath_use,
1967                         NMEM rset_nmem,
1968                         struct rset_key_control *kc)
1969 {
1970     struct grep_info grep_info;
1971     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1972                                            zinfo_index_category_index,
1973                                            index_type, xpath_use);
1974     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1975         return rset_create_null(rset_nmem, kc, 0);
1976     
1977     if (ord < 0)
1978         return rset_create_null(rset_nmem, kc, 0);
1979     else
1980     {
1981         int i, r, max_pos;
1982         char ord_buf[32];
1983         RSET rset;
1984         WRBUF term_dict = wrbuf_alloc();
1985         int ord_len = key_SU_encode(ord, ord_buf);
1986         int term_type = Z_Term_characterString;
1987         const char *flags = "void";
1988
1989         wrbuf_putc(term_dict, '(');
1990         for (i = 0; i<ord_len; i++)
1991         {
1992             wrbuf_putc(term_dict, 1);
1993             wrbuf_putc(term_dict, ord_buf[i]);
1994         }
1995         wrbuf_putc(term_dict, ')');
1996         wrbuf_puts(term_dict, term);
1997         
1998         grep_info.isam_p_indx = 0;
1999         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2000                              &grep_info, &max_pos, 0, grep_handle);
2001         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2002                 grep_info.isam_p_indx);
2003         rset = rset_trunc(zh, grep_info.isam_p_buf,
2004                           grep_info.isam_p_indx, term, strlen(term),
2005                           flags, 1, term_type, rset_nmem,
2006                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2007                           0 /* term_ref_id_str */);
2008         grep_info_delete(&grep_info);
2009         wrbuf_destroy(term_dict);
2010         return rset;
2011     }
2012 }
2013
2014 static
2015 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2016                            NMEM stream, const char *rank_type, RSET rset,
2017                            int xpath_len, struct xpath_location_step *xpath,
2018                            NMEM rset_nmem,
2019                            RSET *rset_out,
2020                            struct rset_key_control *kc)
2021 {
2022     int i;
2023     int always_matches = rset ? 0 : 1;
2024
2025     if (xpath_len < 0)
2026     {
2027         *rset_out = rset;
2028         return ZEBRA_OK;
2029     }
2030
2031     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2032     for (i = 0; i<xpath_len; i++)
2033     {
2034         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2035
2036     }
2037
2038     /*
2039     //a    ->    a/.*
2040     //a/b  ->    b/a/.*
2041     /a     ->    a/
2042     /a/b   ->    b/a/
2043
2044     /      ->    none
2045
2046     a[@attr = value]/b[@other = othervalue]
2047
2048     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2049     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2050     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2051     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2052     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2053     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2054       
2055     */
2056
2057     dict_grep_cmap(zh->reg->dict, 0, 0);
2058     
2059     {
2060         int level = xpath_len;
2061         int first_path = 1;
2062         
2063         while (--level >= 0)
2064         {
2065             WRBUF xpath_rev = wrbuf_alloc();
2066             int i;
2067             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2068
2069             for (i = level; i >= 1; --i)
2070             {
2071                 const char *cp = xpath[i].part;
2072                 if (*cp)
2073                 {
2074                     for (; *cp; cp++)
2075                     {
2076                         if (*cp == '*')
2077                             wrbuf_puts(xpath_rev, "[^/]*");
2078                         else if (*cp == ' ')
2079                             wrbuf_puts(xpath_rev, "\001 ");
2080                         else
2081                             wrbuf_putc(xpath_rev, *cp);
2082
2083                         /* wrbuf_putc does not null-terminate , but
2084                            wrbuf_puts below ensures it does.. so xpath_rev
2085                            is OK iff length is > 0 */
2086                     }
2087                     wrbuf_puts(xpath_rev, "/");
2088                 }
2089                 else if (i == 1)  /* // case */
2090                     wrbuf_puts(xpath_rev, ".*");
2091             }
2092             if (xpath[level].predicate &&
2093                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2094                 xpath[level].predicate->u.relation.name[0])
2095             {
2096                 WRBUF wbuf = wrbuf_alloc();
2097                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2098                 if (xpath[level].predicate->u.relation.value)
2099                 {
2100                     const char *cp = xpath[level].predicate->u.relation.value;
2101                     wrbuf_putc(wbuf, '=');
2102                     
2103                     while (*cp)
2104                     {
2105                         if (strchr(REGEX_CHARS, *cp))
2106                             wrbuf_putc(wbuf, '\\');
2107                         wrbuf_putc(wbuf, *cp);
2108                         cp++;
2109                     }
2110                 }
2111                 rset_attr = xpath_trunc(
2112                     zh, stream, "0", wrbuf_cstr(wbuf), 
2113                     ZEBRA_XPATH_ATTR_NAME, 
2114                     rset_nmem, kc);
2115                 wrbuf_destroy(wbuf);
2116             } 
2117             else 
2118             {
2119                 if (!first_path)
2120                 {
2121                     wrbuf_destroy(xpath_rev);
2122                     continue;
2123                 }
2124             }
2125             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2126                     wrbuf_cstr(xpath_rev));
2127             if (wrbuf_len(xpath_rev))
2128             {
2129                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2130                                              wrbuf_cstr(xpath_rev),
2131                                              ZEBRA_XPATH_ELM_BEGIN, 
2132                                              rset_nmem, kc);
2133                 if (always_matches)
2134                     rset = rset_start_tag;
2135                 else
2136                 {
2137                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2138                                                wrbuf_cstr(xpath_rev),
2139                                                ZEBRA_XPATH_ELM_END, 
2140                                                rset_nmem, kc);
2141                     
2142                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2143                                                rset_start_tag, rset,
2144                                                rset_end_tag, rset_attr);
2145                 }
2146             }
2147             wrbuf_destroy(xpath_rev);
2148             first_path = 0;
2149         }
2150     }
2151     *rset_out = rset;
2152     return ZEBRA_OK;
2153 }
2154
2155 #define MAX_XPATH_STEPS 10
2156
2157 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2158                                      Z_AttributesPlusTerm *zapt,
2159                                      const Odr_oid *attributeSet, NMEM stream,
2160                                      Z_SortKeySpecList *sort_sequence,
2161                                      NMEM rset_nmem,
2162                                      RSET *rset,
2163                                      struct rset_key_control *kc);
2164
2165 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2166                                 const Odr_oid *attributeSet, NMEM stream,
2167                                 Z_SortKeySpecList *sort_sequence,
2168                                 int num_bases, const char **basenames, 
2169                                 NMEM rset_nmem,
2170                                 RSET *rset,
2171                                 struct rset_key_control *kc)
2172 {
2173     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2174     ZEBRA_RES res = ZEBRA_OK;
2175     int i;
2176     for (i = 0; i < num_bases; i++)
2177     {
2178
2179         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2180         {
2181             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2182                            basenames[i]);
2183             res = ZEBRA_FAIL;
2184             break;
2185         }
2186         res = rpn_search_database(zh, zapt, attributeSet, stream,
2187                                   sort_sequence,
2188                                   rset_nmem, rsets+i, kc);
2189         if (res != ZEBRA_OK)
2190             break;
2191     }
2192     if (res != ZEBRA_OK)
2193     {   /* must clean up the already created sets */
2194         while (--i >= 0)
2195             rset_delete(rsets[i]);
2196         *rset = 0;
2197     }
2198     else 
2199     {
2200         if (num_bases == 1)
2201             *rset = rsets[0];
2202         else if (num_bases == 0)
2203             *rset = rset_create_null(rset_nmem, kc, 0); 
2204         else
2205             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2206                                    num_bases, rsets);
2207     }
2208     return res;
2209 }
2210
2211 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2212                                      Z_AttributesPlusTerm *zapt,
2213                                      const Odr_oid *attributeSet, NMEM stream,
2214                                      Z_SortKeySpecList *sort_sequence,
2215                                      NMEM rset_nmem,
2216                                      RSET *rset,
2217                                      struct rset_key_control *kc)
2218 {
2219     ZEBRA_RES res = ZEBRA_OK;
2220     const char *index_type;
2221     char *search_type = NULL;
2222     char rank_type[128];
2223     int complete_flag;
2224     int sort_flag;
2225     char termz[IT_MAX_WORD+1];
2226     int xpath_len;
2227     const char *xpath_use = 0;
2228     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2229
2230     if (!log_level_set)
2231     {
2232         log_level_rpn = yaz_log_module_level("rpn");
2233         log_level_set = 1;
2234     }
2235     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2236                     rank_type, &complete_flag, &sort_flag);
2237     
2238     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2239     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2240     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2241     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2242
2243     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2244         return ZEBRA_FAIL;
2245
2246     if (sort_flag)
2247         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2248                              rank_type, rset_nmem, rset, kc);
2249     /* consider if an X-Path query is used */
2250     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2251                                 xpath, MAX_XPATH_STEPS, stream);
2252     if (xpath_len >= 0)
2253     {
2254         if (xpath[xpath_len-1].part[0] == '@') 
2255             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2256         else
2257             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2258
2259         if (1)
2260         {
2261             AttrType relation;
2262             int relation_value;
2263
2264             attr_init_APT(&relation, zapt, 2);
2265             relation_value = attr_find(&relation, NULL);
2266
2267             if (relation_value == 103) /* alwaysmatches */
2268             {
2269                 *rset = 0; /* signal no "term" set */
2270                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2271                                         xpath_len, xpath, rset_nmem, rset, kc);
2272             }
2273         }
2274     }
2275
2276     /* search using one of the various search type strategies
2277        termz is our UTF-8 search term
2278        attributeSet is top-level default attribute set 
2279        stream is ODR for search
2280        reg_id is the register type
2281        complete_flag is 1 for complete subfield, 0 for incomplete
2282        xpath_use is use-attribute to be used for X-Path search, 0 for none
2283     */
2284     if (!strcmp(search_type, "phrase"))
2285     {
2286         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2287                                     index_type, complete_flag, rank_type,
2288                                     xpath_use,
2289                                     rset_nmem,
2290                                     rset, kc);
2291     }
2292     else if (!strcmp(search_type, "and-list"))
2293     {
2294         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2295                                       index_type, complete_flag, rank_type,
2296                                       xpath_use,
2297                                       rset_nmem,
2298                                       rset, kc);
2299     }
2300     else if (!strcmp(search_type, "or-list"))
2301     {
2302         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2303                                      index_type, complete_flag, rank_type,
2304                                      xpath_use,
2305                                      rset_nmem,
2306                                      rset, kc);
2307     }
2308     else if (!strcmp(search_type, "local"))
2309     {
2310         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2311                                    rank_type, rset_nmem, rset, kc);
2312     }
2313     else if (!strcmp(search_type, "numeric"))
2314     {
2315         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2316                                      index_type, complete_flag, rank_type,
2317                                      xpath_use,
2318                                      rset_nmem,
2319                                      rset, kc);
2320     }
2321     else
2322     {
2323         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2324         res = ZEBRA_FAIL;
2325     }
2326     if (res != ZEBRA_OK)
2327         return res;
2328     if (!*rset)
2329         return ZEBRA_FAIL;
2330     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2331                             xpath_len, xpath, rset_nmem, rset, kc);
2332 }
2333
2334 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2335                                       const Odr_oid *attributeSet, 
2336                                       NMEM stream, NMEM rset_nmem,
2337                                       Z_SortKeySpecList *sort_sequence,
2338                                       int num_bases, const char **basenames,
2339                                       RSET **result_sets, int *num_result_sets,
2340                                       Z_Operator *parent_op,
2341                                       struct rset_key_control *kc);
2342
2343 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2344                                    zint *approx_limit)
2345 {
2346     ZEBRA_RES res = ZEBRA_OK;
2347     if (zs->which == Z_RPNStructure_complex)
2348     {
2349         if (res == ZEBRA_OK)
2350             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2351                                            approx_limit);
2352         if (res == ZEBRA_OK)
2353             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2354                                            approx_limit);
2355     }
2356     else if (zs->which == Z_RPNStructure_simple)
2357     {
2358         if (zs->u.simple->which == Z_Operand_APT)
2359         {
2360             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2361             AttrType global_hits_limit_attr;
2362             int l;
2363             
2364             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2365             
2366             l = attr_find(&global_hits_limit_attr, NULL);
2367             if (l != -1)
2368                 *approx_limit = l;
2369         }
2370     }
2371     return res;
2372 }
2373
2374 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2375                          const Odr_oid *attributeSet, 
2376                          NMEM stream, NMEM rset_nmem,
2377                          Z_SortKeySpecList *sort_sequence,
2378                          int num_bases, const char **basenames,
2379                          RSET *result_set)
2380 {
2381     RSET *result_sets = 0;
2382     int num_result_sets = 0;
2383     ZEBRA_RES res;
2384     struct rset_key_control *kc = zebra_key_control_create(zh);
2385
2386     res = rpn_search_structure(zh, zs, attributeSet,
2387                                stream, rset_nmem,
2388                                sort_sequence, 
2389                                num_bases, basenames,
2390                                &result_sets, &num_result_sets,
2391                                0 /* no parent op */,
2392                                kc);
2393     if (res != ZEBRA_OK)
2394     {
2395         int i;
2396         for (i = 0; i<num_result_sets; i++)
2397             rset_delete(result_sets[i]);
2398         *result_set = 0;
2399     }
2400     else
2401     {
2402         assert(num_result_sets == 1);
2403         assert(result_sets);
2404         assert(*result_sets);
2405         *result_set = *result_sets;
2406     }
2407     (*kc->dec)(kc);
2408     return res;
2409 }
2410
2411 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2412                                const Odr_oid *attributeSet, 
2413                                NMEM stream, NMEM rset_nmem,
2414                                Z_SortKeySpecList *sort_sequence,
2415                                int num_bases, const char **basenames,
2416                                RSET **result_sets, int *num_result_sets,
2417                                Z_Operator *parent_op,
2418                                struct rset_key_control *kc)
2419 {
2420     *num_result_sets = 0;
2421     if (zs->which == Z_RPNStructure_complex)
2422     {
2423         ZEBRA_RES res;
2424         Z_Operator *zop = zs->u.complex->roperator;
2425         RSET *result_sets_l = 0;
2426         int num_result_sets_l = 0;
2427         RSET *result_sets_r = 0;
2428         int num_result_sets_r = 0;
2429
2430         res = rpn_search_structure(zh, zs->u.complex->s1,
2431                                    attributeSet, stream, rset_nmem,
2432                                    sort_sequence,
2433                                    num_bases, basenames,
2434                                    &result_sets_l, &num_result_sets_l,
2435                                    zop, kc);
2436         if (res != ZEBRA_OK)
2437         {
2438             int i;
2439             for (i = 0; i<num_result_sets_l; i++)
2440                 rset_delete(result_sets_l[i]);
2441             return res;
2442         }
2443         res = rpn_search_structure(zh, zs->u.complex->s2,
2444                                    attributeSet, stream, rset_nmem,
2445                                    sort_sequence,
2446                                    num_bases, basenames,
2447                                    &result_sets_r, &num_result_sets_r,
2448                                    zop, kc);
2449         if (res != ZEBRA_OK)
2450         {
2451             int i;
2452             for (i = 0; i<num_result_sets_l; i++)
2453                 rset_delete(result_sets_l[i]);
2454             for (i = 0; i<num_result_sets_r; i++)
2455                 rset_delete(result_sets_r[i]);
2456             return res;
2457         }
2458
2459         /* make a new list of result for all children */
2460         *num_result_sets = num_result_sets_l + num_result_sets_r;
2461         *result_sets = nmem_malloc(stream, *num_result_sets * 
2462                                    sizeof(**result_sets));
2463         memcpy(*result_sets, result_sets_l, 
2464                num_result_sets_l * sizeof(**result_sets));
2465         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2466                num_result_sets_r * sizeof(**result_sets));
2467
2468         if (!parent_op || parent_op->which != zop->which
2469             || (zop->which != Z_Operator_and &&
2470                 zop->which != Z_Operator_or))
2471         {
2472             /* parent node different from this one (or non-present) */
2473             /* we must combine result sets now */
2474             RSET rset;
2475             switch (zop->which)
2476             {
2477             case Z_Operator_and:
2478                 rset = rset_create_and(rset_nmem, kc,
2479                                        kc->scope,
2480                                        *num_result_sets, *result_sets);
2481                 break;
2482             case Z_Operator_or:
2483                 rset = rset_create_or(rset_nmem, kc,
2484                                       kc->scope, 0, /* termid */
2485                                       *num_result_sets, *result_sets);
2486                 break;
2487             case Z_Operator_and_not:
2488                 rset = rset_create_not(rset_nmem, kc,
2489                                        kc->scope,
2490                                        (*result_sets)[0],
2491                                        (*result_sets)[1]);
2492                 break;
2493             case Z_Operator_prox:
2494                 if (zop->u.prox->which != Z_ProximityOperator_known)
2495                 {
2496                     zebra_setError(zh, 
2497                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2498                                    0);
2499                     return ZEBRA_FAIL;
2500                 }
2501                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2502                 {
2503                     zebra_setError_zint(zh,
2504                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2505                                         *zop->u.prox->u.known);
2506                     return ZEBRA_FAIL;
2507                 }
2508                 else
2509                 {
2510                     rset = rset_create_prox(rset_nmem, kc,
2511                                             kc->scope,
2512                                             *num_result_sets, *result_sets, 
2513                                             *zop->u.prox->ordered,
2514                                             (!zop->u.prox->exclusion ? 
2515                                              0 : *zop->u.prox->exclusion),
2516                                             *zop->u.prox->relationType,
2517                                             *zop->u.prox->distance );
2518                 }
2519                 break;
2520             default:
2521                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2522                 return ZEBRA_FAIL;
2523             }
2524             *num_result_sets = 1;
2525             *result_sets = nmem_malloc(stream, *num_result_sets * 
2526                                        sizeof(**result_sets));
2527             (*result_sets)[0] = rset;
2528         }
2529     }
2530     else if (zs->which == Z_RPNStructure_simple)
2531     {
2532         RSET rset;
2533         ZEBRA_RES res;
2534
2535         if (zs->u.simple->which == Z_Operand_APT)
2536         {
2537             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2538             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2539                                  attributeSet, stream, sort_sequence,
2540                                  num_bases, basenames, rset_nmem, &rset,
2541                                  kc);
2542             if (res != ZEBRA_OK)
2543                 return res;
2544         }
2545         else if (zs->u.simple->which == Z_Operand_resultSetId)
2546         {
2547             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2548             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2549             if (!rset)
2550             {
2551                 zebra_setError(zh, 
2552                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2553                                zs->u.simple->u.resultSetId);
2554                 return ZEBRA_FAIL;
2555             }
2556             rset_dup(rset);
2557         }
2558         else
2559         {
2560             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2561             return ZEBRA_FAIL;
2562         }
2563         *num_result_sets = 1;
2564         *result_sets = nmem_malloc(stream, *num_result_sets * 
2565                                    sizeof(**result_sets));
2566         (*result_sets)[0] = rset;
2567     }
2568     else
2569     {
2570         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2571         return ZEBRA_FAIL;
2572     }
2573     return ZEBRA_OK;
2574 }
2575
2576
2577
2578 /*
2579  * Local variables:
2580  * c-basic-offset: 4
2581  * indent-tabs-mode: nil
2582  * End:
2583  * vim: shiftwidth=4 tabstop=8 expandtab
2584  */
2585