zebrasrv: sortkeys args are optional
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2011 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231         
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 static int term_100_icu(zebra_map_t zm,
238                         const char **src, WRBUF term_dict, int space_split,
239                         WRBUF display_term,
240                         int right_trunc)
241 {
242     int i;
243     const char *res_buf = 0;
244     size_t res_len = 0;
245     const char *display_buf;
246     size_t display_len;
247     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
248                                  &display_buf, &display_len))
249     {
250         *src += strlen(*src);
251         return 0;
252     }
253     wrbuf_write(display_term, display_buf, display_len);
254     if (right_trunc)
255     {
256         /* ICU sort keys seem to be of the form
257            basechars \x01 accents \x01 length
258            For now we'll just right truncate from basechars . This 
259            may give false hits due to accents not being used.
260         */
261         i = res_len;
262         while (--i >= 0 && res_buf[i] != '\x01')
263             ;
264         if (i > 0)
265         {
266             while (--i >= 0 && res_buf[i] != '\x01')
267                 ;
268         }
269         if (i == 0)
270         {  /* did not find base chars at all. Throw error */
271             return -1;
272         }
273         res_len = i; /* reduce res_len */
274     }
275     for (i = 0; i < res_len; i++)
276     {
277         if (strchr(REGEX_CHARS "\\", res_buf[i]))
278             wrbuf_putc(term_dict, '\\');
279         if (res_buf[i] < 32)
280             wrbuf_putc(term_dict, 1);
281             
282         wrbuf_putc(term_dict, res_buf[i]);
283     }
284     if (right_trunc)
285         wrbuf_puts(term_dict, ".*");
286     return 1;
287 }
288
289 /* term_100: handle term, where trunc = none(no operators at all) */
290 static int term_100(zebra_map_t zm,
291                     const char **src, WRBUF term_dict, int space_split,
292                     WRBUF display_term)
293 {
294     const char *s0;
295     const char **map;
296     int i = 0;
297
298     const char *space_start = 0;
299     const char *space_end = 0;
300
301     if (!term_pre(zm, src, 0, !space_split))
302         return 0;
303     s0 = *src;
304     while (*s0)
305     {
306         const char *s1 = s0;
307         int q_map_match = 0;
308         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
309         if (space_split)
310         {
311             if (**map == *CHR_SPACE)
312                 break;
313         }
314         else  /* complete subfield only. */
315         {
316             if (**map == *CHR_SPACE)
317             {   /* save space mapping for later  .. */
318                 space_start = s1;
319                 space_end = s0;
320                 continue;
321             }
322             else if (space_start)
323             {   /* reload last space */
324                 while (space_start < space_end)
325                 {
326                     if (strchr(REGEX_CHARS, *space_start))
327                         wrbuf_putc(term_dict, '\\');
328                     wrbuf_putc(display_term, *space_start);
329                     wrbuf_putc(term_dict, *space_start);
330                     space_start++;
331                                
332                 }
333                 /* and reset */
334                 space_start = space_end = 0;
335             }
336         }
337         i++;
338
339         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
340     }
341     *src = s0;
342     return i;
343 }
344
345 /* term_101: handle term, where trunc = Process # */
346 static int term_101(zebra_map_t zm,
347                     const char **src, WRBUF term_dict, int space_split,
348                     WRBUF display_term)
349 {
350     const char *s0;
351     const char **map;
352     int i = 0;
353
354     if (!term_pre(zm, src, "#", !space_split))
355         return 0;
356     s0 = *src;
357     while (*s0)
358     {
359         if (*s0 == '#')
360         {
361             i++;
362             wrbuf_puts(term_dict, ".*");
363             wrbuf_putc(display_term, *s0);
364             s0++;
365         }
366         else
367         {
368             const char *s1 = s0;
369             int q_map_match = 0;
370             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
371             if (space_split && **map == *CHR_SPACE)
372                 break;
373
374             i++;
375             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
376         }
377     }
378     *src = s0;
379     return i;
380 }
381
382 /* term_103: handle term, where trunc = re-2 (regular expressions) */
383 static int term_103(zebra_map_t zm, const char **src,
384                     WRBUF term_dict, int *errors, int space_split,
385                     WRBUF display_term)
386 {
387     int i = 0;
388     const char *s0;
389     const char **map;
390
391     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
392         return 0;
393     s0 = *src;
394     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
395         isdigit(((const unsigned char *)s0)[1]))
396     {
397         *errors = s0[1] - '0';
398         s0 += 3;
399         if (*errors > 3)
400             *errors = 3;
401     }
402     while (*s0)
403     {
404         if (strchr("^\\()[].*+?|-", *s0))
405         {
406             wrbuf_putc(display_term, *s0);
407             wrbuf_putc(term_dict, *s0);
408             s0++;
409             i++;
410         }
411         else
412         {
413             const char *s1 = s0;
414             int q_map_match = 0;
415             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
416             if (space_split && **map == *CHR_SPACE)
417                 break;
418
419             i++;
420             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
421         }
422     }
423     *src = s0;
424     
425     return i;
426 }
427
428 /* term_103: handle term, where trunc = re-1 (regular expressions) */
429 static int term_102(zebra_map_t zm, const char **src,
430                     WRBUF term_dict, int space_split, WRBUF display_term)
431 {
432     return term_103(zm, src, term_dict, NULL, space_split, display_term);
433 }
434
435
436 /* term_104: handle term, process # and ! */
437 static int term_104(zebra_map_t zm, const char **src, 
438                     WRBUF term_dict, int space_split, WRBUF display_term)
439 {
440     const char *s0;
441     const char **map;
442     int i = 0;
443
444     if (!term_pre(zm, src, "?*#", !space_split))
445         return 0;
446     s0 = *src;
447     while (*s0)
448     {
449         if (*s0 == '?')
450         {
451             i++;
452             wrbuf_putc(display_term, *s0);
453             s0++;
454             if (*s0 >= '0' && *s0 <= '9')
455             {
456                 int limit = 0;
457                 while (*s0 >= '0' && *s0 <= '9')
458                 {
459                     limit = limit * 10 + (*s0 - '0');
460                     wrbuf_putc(display_term, *s0);
461                     s0++;
462                 }
463                 if (limit > 20)
464                     limit = 20;
465                 while (--limit >= 0)
466                 {
467                     wrbuf_puts(term_dict, ".?");
468                 }
469             }
470             else
471             {
472                 wrbuf_puts(term_dict, ".*");
473             }
474         }
475         else if (*s0 == '*')
476         {
477             i++;
478             wrbuf_puts(term_dict, ".*");
479             wrbuf_putc(display_term, *s0);
480             s0++;
481         }
482         else if (*s0 == '#')
483         {
484             i++;
485             wrbuf_puts(term_dict, ".");
486             wrbuf_putc(display_term, *s0);
487             s0++;
488         }
489         else
490         {
491             const char *s1 = s0;
492             int q_map_match = 0;
493             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
494             if (space_split && **map == *CHR_SPACE)
495                 break;
496
497             i++;
498             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
499         }
500     }
501     *src = s0;
502     return i;
503 }
504
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(zebra_map_t zm, const char **src, 
507                     WRBUF term_dict, int space_split,
508                     WRBUF display_term, int right_truncate)
509 {
510     const char *s0;
511     const char **map;
512     int i = 0;
513
514     if (!term_pre(zm, src, "\\*!", !space_split))
515         return 0;
516     s0 = *src;
517     while (*s0)
518     {
519         if (*s0 == '*')
520         {
521             i++;
522             wrbuf_puts(term_dict, ".*");
523             wrbuf_putc(display_term, *s0);
524             s0++;
525         }
526         else if (*s0 == '!')
527         {
528             i++;
529             wrbuf_putc(term_dict, '.');
530             wrbuf_putc(display_term, *s0);
531             s0++;
532         }
533         else if (*s0 == '\\')
534         {
535             i++;
536             wrbuf_puts(term_dict, "\\\\");
537             wrbuf_putc(display_term, *s0);
538             s0++;
539         }
540         else
541         {
542             const char *s1 = s0;
543             int q_map_match = 0;
544             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
545             if (space_split && **map == *CHR_SPACE)
546                 break;
547
548             i++;
549             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
550         }
551     }
552     if (right_truncate)
553         wrbuf_puts(term_dict, ".*");
554     *src = s0;
555     return i;
556 }
557
558
559 /* gen_regular_rel - generate regular expression from relation
560  *  val:     border value (inclusive)
561  *  islt:    1 if <=; 0 if >=.
562  */
563 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
564 {
565     char dst_buf[20*5*20]; /* assuming enough for expansion */
566     char *dst = dst_buf;
567     int dst_p;
568     int w, d, i;
569     int pos = 0;
570     char numstr[20];
571
572     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
573     if (val >= 0)
574     {
575         if (islt)
576             strcpy(dst, "(-[0-9]+|(");
577         else
578             strcpy(dst, "((");
579     } 
580     else
581     {
582         if (!islt)
583         {
584             strcpy(dst, "([0-9]+|-(");
585             islt = 1;
586         }
587         else
588         {
589             strcpy(dst, "(-(");
590             islt = 0;
591         }
592         val = -val;
593     }
594     dst_p = strlen(dst);
595     sprintf(numstr, "%d", val);
596     for (w = strlen(numstr); --w >= 0; pos++)
597     {
598         d = numstr[w];
599         if (pos > 0)
600         {
601             if (islt)
602             {
603                 if (d == '0')
604                     continue;
605                 d--;
606             } 
607             else
608             {
609                 if (d == '9')
610                     continue;
611                 d++;
612             }
613         }
614         
615         strcpy(dst + dst_p, numstr);
616         dst_p = strlen(dst) - pos - 1;
617
618         if (islt)
619         {
620             if (d != '0')
621             {
622                 dst[dst_p++] = '[';
623                 dst[dst_p++] = '0';
624                 dst[dst_p++] = '-';
625                 dst[dst_p++] = d;
626                 dst[dst_p++] = ']';
627             }
628             else
629                 dst[dst_p++] = d;
630         }
631         else
632         {
633             if (d != '9')
634             { 
635                 dst[dst_p++] = '[';
636                 dst[dst_p++] = d;
637                 dst[dst_p++] = '-';
638                 dst[dst_p++] = '9';
639                 dst[dst_p++] = ']';
640             }
641             else
642                 dst[dst_p++] = d;
643         }
644         for (i = 0; i<pos; i++)
645         {
646             dst[dst_p++] = '[';
647             dst[dst_p++] = '0';
648             dst[dst_p++] = '-';
649             dst[dst_p++] = '9';
650             dst[dst_p++] = ']';
651         }
652         dst[dst_p++] = '|';
653     }
654     dst[dst_p] = '\0';
655     if (islt)
656     {
657         /* match everything less than 10^(pos-1) */
658         strcat(dst, "0*");
659         for (i = 1; i<pos; i++)
660             strcat(dst, "[0-9]?");
661     }
662     else
663     {
664         /* match everything greater than 10^pos */
665         for (i = 0; i <= pos; i++)
666             strcat(dst, "[0-9]");
667         strcat(dst, "[0-9]*");
668     }
669     strcat(dst, "))");
670     wrbuf_puts(term_dict, dst);
671 }
672
673 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
674 {
675     const char *src = wrbuf_cstr(wsrc);
676     if (src[*indx] == '\\')
677     {
678         wrbuf_putc(term_p, src[*indx]);
679         (*indx)++;
680     }
681     wrbuf_putc(term_p, src[*indx]);
682     (*indx)++;
683 }
684
685 /*
686  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
687  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
688  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
689  *              ([^-a].*|a[^-b].*|ab[c-].*)
690  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
691  *              ([^a-].*|a[^b-].*|ab[^c-].*)
692  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
693  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
694  */
695 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
696                            const char **term_sub, WRBUF term_dict,
697                            const Odr_oid *attributeSet,
698                            zebra_map_t zm, int space_split, 
699                            WRBUF display_term,
700                            int *error_code)
701 {
702     AttrType relation;
703     int relation_value;
704     int i;
705     WRBUF term_component = wrbuf_alloc();
706
707     attr_init_APT(&relation, zapt, 2);
708     relation_value = attr_find(&relation, NULL);
709
710     *error_code = 0;
711     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
712     switch (relation_value)
713     {
714     case 1:
715         if (!term_100(zm, term_sub, term_component, space_split, display_term))
716         {
717             wrbuf_destroy(term_component);
718             return 0;
719         }
720         yaz_log(log_level_rpn, "Relation <");
721         
722         wrbuf_putc(term_dict, '(');
723         for (i = 0; i < wrbuf_len(term_component); )
724         {
725             int j = 0;
726             
727             if (i)
728                 wrbuf_putc(term_dict, '|');
729             while (j < i)
730                 string_rel_add_char(term_dict, term_component, &j);
731
732             wrbuf_putc(term_dict, '[');
733
734             wrbuf_putc(term_dict, '^');
735             
736             wrbuf_putc(term_dict, 1);
737             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
738             
739             string_rel_add_char(term_dict, term_component, &i);
740             wrbuf_putc(term_dict, '-');
741             
742             wrbuf_putc(term_dict, ']');
743             wrbuf_putc(term_dict, '.');
744             wrbuf_putc(term_dict, '*');
745         }
746         wrbuf_putc(term_dict, ')');
747         break;
748     case 2:
749         if (!term_100(zm, term_sub, term_component, space_split, display_term))
750         {
751             wrbuf_destroy(term_component);
752             return 0;
753         }
754         yaz_log(log_level_rpn, "Relation <=");
755
756         wrbuf_putc(term_dict, '(');
757         for (i = 0; i < wrbuf_len(term_component); )
758         {
759             int j = 0;
760
761             while (j < i)
762                 string_rel_add_char(term_dict, term_component, &j);
763             wrbuf_putc(term_dict, '[');
764
765             wrbuf_putc(term_dict, '^');
766
767             wrbuf_putc(term_dict, 1);
768             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
769
770             string_rel_add_char(term_dict, term_component, &i);
771             wrbuf_putc(term_dict, '-');
772
773             wrbuf_putc(term_dict, ']');
774             wrbuf_putc(term_dict, '.');
775             wrbuf_putc(term_dict, '*');
776
777             wrbuf_putc(term_dict, '|');
778         }
779         for (i = 0; i < wrbuf_len(term_component); )
780             string_rel_add_char(term_dict, term_component, &i);
781         wrbuf_putc(term_dict, ')');
782         break;
783     case 5:
784         if (!term_100(zm, term_sub, term_component, space_split, display_term))
785         {
786             wrbuf_destroy(term_component);
787             return 0;
788         }
789         yaz_log(log_level_rpn, "Relation >");
790
791         wrbuf_putc(term_dict, '(');
792         for (i = 0; i < wrbuf_len(term_component); )
793         {
794             int j = 0;
795
796             while (j < i)
797                 string_rel_add_char(term_dict, term_component, &j);
798             wrbuf_putc(term_dict, '[');
799             
800             wrbuf_putc(term_dict, '^');
801             wrbuf_putc(term_dict, '-');
802             string_rel_add_char(term_dict, term_component, &i);
803
804             wrbuf_putc(term_dict, ']');
805             wrbuf_putc(term_dict, '.');
806             wrbuf_putc(term_dict, '*');
807
808             wrbuf_putc(term_dict, '|');
809         }
810         for (i = 0; i < wrbuf_len(term_component); )
811             string_rel_add_char(term_dict, term_component, &i);
812         wrbuf_putc(term_dict, '.');
813         wrbuf_putc(term_dict, '+');
814         wrbuf_putc(term_dict, ')');
815         break;
816     case 4:
817         if (!term_100(zm, term_sub, term_component, space_split, display_term))
818         {
819             wrbuf_destroy(term_component);
820             return 0;
821         }
822         yaz_log(log_level_rpn, "Relation >=");
823
824         wrbuf_putc(term_dict, '(');
825         for (i = 0; i < wrbuf_len(term_component); )
826         {
827             int j = 0;
828
829             if (i)
830                 wrbuf_putc(term_dict, '|');
831             while (j < i)
832                 string_rel_add_char(term_dict, term_component, &j);
833             wrbuf_putc(term_dict, '[');
834
835             if (i < wrbuf_len(term_component)-1)
836             {
837                 wrbuf_putc(term_dict, '^');
838                 wrbuf_putc(term_dict, '-');
839                 string_rel_add_char(term_dict, term_component, &i);
840             }
841             else
842             {
843                 string_rel_add_char(term_dict, term_component, &i);
844                 wrbuf_putc(term_dict, '-');
845             }
846             wrbuf_putc(term_dict, ']');
847             wrbuf_putc(term_dict, '.');
848             wrbuf_putc(term_dict, '*');
849         }
850         wrbuf_putc(term_dict, ')');
851         break;
852     case 3:
853     case 102:
854     case -1:
855         if (!**term_sub)
856             return 1;
857         yaz_log(log_level_rpn, "Relation =");
858         if (!term_100(zm, term_sub, term_component, space_split, display_term))
859         {
860             wrbuf_destroy(term_component);
861             return 0;
862         }
863         wrbuf_puts(term_dict, "(");
864         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
865         wrbuf_puts(term_dict, ")");
866         break;
867     case 103:
868         yaz_log(log_level_rpn, "Relation always matches");
869         /* skip to end of term (we don't care what it is) */
870         while (**term_sub != '\0')
871             (*term_sub)++;
872         break;
873     default:
874         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
875         wrbuf_destroy(term_component);
876         return 0;
877     }
878     wrbuf_destroy(term_component);
879     return 1;
880 }
881
882 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
883                              const char **term_sub, 
884                              WRBUF term_dict,
885                              const Odr_oid *attributeSet, NMEM stream,
886                              struct grep_info *grep_info,
887                              const char *index_type, int complete_flag,
888                              WRBUF display_term,
889                              const char *xpath_use,
890                              struct ord_list **ol,
891                              zebra_map_t zm);
892
893 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
894                                 Z_AttributesPlusTerm *zapt,
895                                 zint *hits_limit_value,
896                                 const char **term_ref_id_str,
897                                 NMEM nmem)
898 {
899     AttrType term_ref_id_attr;
900     AttrType hits_limit_attr;
901     int term_ref_id_int;
902     zint hits_limit_from_attr;
903  
904     attr_init_APT(&hits_limit_attr, zapt, 11);
905     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
906
907     attr_init_APT(&term_ref_id_attr, zapt, 10);
908     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
909     if (term_ref_id_int >= 0)
910     {
911         char *res = nmem_malloc(nmem, 20);
912         sprintf(res, "%d", term_ref_id_int);
913         *term_ref_id_str = res;
914     }
915     if (hits_limit_from_attr != -1)
916         *hits_limit_value = hits_limit_from_attr;
917
918     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
919             *term_ref_id_str ? *term_ref_id_str : "none",
920             *hits_limit_value);
921     return ZEBRA_OK;
922 }
923
924 /** \brief search for term (which may be truncated)
925  */
926 static ZEBRA_RES search_term(ZebraHandle zh,
927                              Z_AttributesPlusTerm *zapt,
928                              const char **term_sub, 
929                              const Odr_oid *attributeSet,
930                              zint hits_limit, NMEM stream,
931                              struct grep_info *grep_info,
932                              const char *index_type, int complete_flag,
933                              const char *rank_type, 
934                              const char *xpath_use,
935                              NMEM rset_nmem,
936                              RSET *rset,
937                              struct rset_key_control *kc,
938                              zebra_map_t zm)
939 {
940     ZEBRA_RES res;
941     struct ord_list *ol;
942     zint hits_limit_value = hits_limit;
943     const char *term_ref_id_str = 0;
944     WRBUF term_dict = wrbuf_alloc();
945     WRBUF display_term = wrbuf_alloc();
946     *rset = 0;
947     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
948                           stream);
949     grep_info->isam_p_indx = 0;
950     res = string_term(zh, zapt, term_sub, term_dict,
951                       attributeSet, stream, grep_info,
952                       index_type, complete_flag,
953                       display_term, xpath_use, &ol, zm);
954     wrbuf_destroy(term_dict);
955     if (res == ZEBRA_OK && *term_sub)
956     {
957         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
958         *rset = rset_trunc(zh, grep_info->isam_p_buf,
959                            grep_info->isam_p_indx, wrbuf_buf(display_term),
960                            wrbuf_len(display_term), rank_type, 
961                            1 /* preserve pos */,
962                            zapt->term->which, rset_nmem,
963                            kc, kc->scope, ol, index_type, hits_limit_value,
964                            term_ref_id_str);
965         if (!*rset)
966             res = ZEBRA_FAIL;
967     }
968     wrbuf_destroy(display_term);
969     return res;
970 }
971
972 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
973                              const char **term_sub, 
974                              WRBUF term_dict,
975                              const Odr_oid *attributeSet, NMEM stream,
976                              struct grep_info *grep_info,
977                              const char *index_type, int complete_flag,
978                              WRBUF display_term,
979                              const char *xpath_use,
980                              struct ord_list **ol,
981                              zebra_map_t zm)
982 {
983     int r;
984     AttrType truncation;
985     int truncation_value;
986     const char *termp;
987     struct rpn_char_map_info rcmi;
988
989     int space_split = complete_flag ? 0 : 1;
990     int ord = -1;
991     int regex_range = 0;
992     int max_pos, prefix_len = 0;
993     int relation_error;
994     char ord_buf[32];
995     int ord_len, i;
996
997     *ol = ord_list_create(stream);
998
999     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1000     attr_init_APT(&truncation, zapt, 5);
1001     truncation_value = attr_find(&truncation, NULL);
1002     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1003
1004     termp = *term_sub; /* start of term for each database */
1005     
1006     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1007                           attributeSet, &ord) != ZEBRA_OK)
1008     {
1009         *term_sub = 0;
1010         return ZEBRA_FAIL;
1011     }
1012     
1013     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1014     
1015     *ol = ord_list_append(stream, *ol, ord);
1016     ord_len = key_SU_encode(ord, ord_buf);
1017     
1018     wrbuf_putc(term_dict, '(');
1019     
1020     for (i = 0; i<ord_len; i++)
1021     {
1022         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1023         wrbuf_putc(term_dict, ord_buf[i]);
1024     }
1025     wrbuf_putc(term_dict, ')');
1026     
1027     prefix_len = wrbuf_len(term_dict);
1028
1029     if (zebra_maps_is_icu(zm))
1030     {
1031         int relation_value;
1032         AttrType relation;
1033         
1034         attr_init_APT(&relation, zapt, 2);
1035         relation_value = attr_find(&relation, NULL);
1036         if (relation_value == 103) /* always matches */
1037             termp += strlen(termp); /* move to end of term */
1038         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1039         {
1040             /* ICU case */
1041             switch (truncation_value)
1042             {
1043             case -1:         /* not specified */
1044             case 100:        /* do not truncate */
1045                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1046                 {
1047                     *term_sub = 0;
1048                     return ZEBRA_OK;
1049                 }
1050                 break;
1051             case 1:          /* right truncation */
1052                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1053                 {
1054                     *term_sub = 0;
1055                     return ZEBRA_OK;
1056                 }
1057                 break;
1058             default:
1059                 zebra_setError_zint(zh,
1060                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1061                                     truncation_value);
1062                 return ZEBRA_FAIL;
1063             }
1064         }
1065         else
1066         {
1067             zebra_setError_zint(zh,
1068                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1069                                 relation_value);
1070             return ZEBRA_FAIL;
1071         }
1072     }
1073     else
1074     {
1075         /* non-ICU case. using string.chr and friends */
1076         switch (truncation_value)
1077         {
1078         case -1:         /* not specified */
1079         case 100:        /* do not truncate */
1080             if (!string_relation(zh, zapt, &termp, term_dict,
1081                                  attributeSet,
1082                                  zm, space_split, display_term,
1083                                  &relation_error))
1084             {
1085                 if (relation_error)
1086                 {
1087                     zebra_setError(zh, relation_error, 0);
1088                     return ZEBRA_FAIL;
1089                 }
1090                 *term_sub = 0;
1091                 return ZEBRA_OK;
1092             }
1093             break;
1094         case 1:          /* right truncation */
1095             wrbuf_putc(term_dict, '(');
1096             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1097             {
1098                 *term_sub = 0;
1099                 return ZEBRA_OK;
1100             }
1101             wrbuf_puts(term_dict, ".*)");
1102             break;
1103         case 2:          /* left truncation */
1104             wrbuf_puts(term_dict, "(.*");
1105             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1106             {
1107                 *term_sub = 0;
1108                 return ZEBRA_OK;
1109             }
1110             wrbuf_putc(term_dict, ')');
1111             break;
1112         case 3:          /* left&right truncation */
1113             wrbuf_puts(term_dict, "(.*");
1114             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1115             {
1116                 *term_sub = 0;
1117                 return ZEBRA_OK;
1118             }
1119             wrbuf_puts(term_dict, ".*)");
1120             break;
1121         case 101:        /* process # in term */
1122             wrbuf_putc(term_dict, '(');
1123             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1124             {
1125                 *term_sub = 0;
1126                 return ZEBRA_OK;
1127             }
1128             wrbuf_puts(term_dict, ")");
1129             break;
1130         case 102:        /* Regexp-1 */
1131             wrbuf_putc(term_dict, '(');
1132             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             wrbuf_putc(term_dict, ')');
1138             break;
1139         case 103:       /* Regexp-2 */
1140             regex_range = 1;
1141             wrbuf_putc(term_dict, '(');
1142             if (!term_103(zm, &termp, term_dict, &regex_range,
1143                           space_split, display_term))
1144             {
1145                 *term_sub = 0;
1146                 return ZEBRA_OK;
1147             }
1148             wrbuf_putc(term_dict, ')');
1149             break;
1150         case 104:        /* process # and ! in term */
1151             wrbuf_putc(term_dict, '(');
1152             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1153             {
1154                 *term_sub = 0;
1155                 return ZEBRA_OK;
1156             }
1157             wrbuf_putc(term_dict, ')');
1158             break;
1159         case 105:        /* process * and ! in term */
1160             wrbuf_putc(term_dict, '(');
1161             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1162             {
1163                 *term_sub = 0;
1164                 return ZEBRA_OK;
1165             }
1166             wrbuf_putc(term_dict, ')');
1167             break;
1168         case 106:        /* process * and ! in term */
1169             wrbuf_putc(term_dict, '(');
1170             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1171             {
1172                 *term_sub = 0;
1173                 return ZEBRA_OK;
1174             }
1175             wrbuf_putc(term_dict, ')');
1176             break;
1177         default:
1178             zebra_setError_zint(zh,
1179                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1180                                 truncation_value);
1181             return ZEBRA_FAIL;
1182         }
1183     }
1184     if (1)
1185     {
1186         char buf[1000];
1187         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1188         esc_str(buf, sizeof(buf), input, strlen(input));
1189     }
1190     {
1191         WRBUF pr_wr = wrbuf_alloc();
1192
1193         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1194         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1195         wrbuf_destroy(pr_wr);
1196     }
1197     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1198                          grep_info, &max_pos, 
1199                          ord_len /* number of "exact" chars */,
1200                          grep_handle);
1201     if (r == 1)
1202         zebra_set_partial_result(zh);
1203     else if (r)
1204         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1205     *term_sub = termp;
1206     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1207     return ZEBRA_OK;
1208 }
1209
1210
1211
1212 static void grep_info_delete(struct grep_info *grep_info)
1213 {
1214 #ifdef TERM_COUNT
1215     xfree(grep_info->term_no);
1216 #endif
1217     xfree(grep_info->isam_p_buf);
1218 }
1219
1220 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1221                                    Z_AttributesPlusTerm *zapt,
1222                                    struct grep_info *grep_info,
1223                                    const char *index_type)
1224 {
1225 #ifdef TERM_COUNT
1226     grep_info->term_no = 0;
1227 #endif
1228     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1229     grep_info->isam_p_size = 0;
1230     grep_info->isam_p_buf = NULL;
1231     grep_info->zh = zh;
1232     grep_info->index_type = index_type;
1233     grep_info->termset = 0;
1234     if (zapt)
1235     {
1236         AttrType truncmax;
1237         int truncmax_value;
1238
1239         attr_init_APT(&truncmax, zapt, 13);
1240         truncmax_value = attr_find(&truncmax, NULL);
1241         if (truncmax_value != -1)
1242             grep_info->trunc_max = truncmax_value;
1243     }
1244     if (zapt)
1245     {
1246         AttrType termset;
1247         int termset_value_numeric;
1248         const char *termset_value_string;
1249
1250         attr_init_APT(&termset, zapt, 8);
1251         termset_value_numeric =
1252             attr_find_ex(&termset, NULL, &termset_value_string);
1253         if (termset_value_numeric != -1)
1254         {
1255 #if TERMSET_DISABLE
1256             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1257             return ZEBRA_FAIL;
1258 #else
1259             char resname[32];
1260             const char *termset_name = 0;
1261             if (termset_value_numeric != -2)
1262             {
1263                 
1264                 sprintf(resname, "%d", termset_value_numeric);
1265                 termset_name = resname;
1266             }
1267             else
1268                 termset_name = termset_value_string;
1269             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1270             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1271             if (!grep_info->termset)
1272             {
1273                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1274                 return ZEBRA_FAIL;
1275             }
1276 #endif
1277         }
1278     }
1279     return ZEBRA_OK;
1280 }
1281
1282 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1283                                      Z_AttributesPlusTerm *zapt,
1284                                      const char *termz,
1285                                      const Odr_oid *attributeSet,
1286                                      zint hits_limit,
1287                                      NMEM stream,
1288                                      const char *index_type, int complete_flag,
1289                                      const char *rank_type,
1290                                      const char *xpath_use,
1291                                      NMEM rset_nmem,
1292                                      RSET **result_sets, int *num_result_sets,
1293                                      struct rset_key_control *kc,
1294                                      zebra_map_t zm)
1295 {
1296     struct grep_info grep_info;
1297     const char *termp = termz;
1298     int alloc_sets = 0;
1299     
1300     *num_result_sets = 0;
1301     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1302         return ZEBRA_FAIL;
1303     while (1)
1304     { 
1305         ZEBRA_RES res;
1306
1307         if (alloc_sets == *num_result_sets)
1308         {
1309             int add = 10;
1310             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1311                                               sizeof(*rnew));
1312             if (alloc_sets)
1313                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1314             alloc_sets = alloc_sets + add;
1315             *result_sets = rnew;
1316         }
1317         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1318                           stream, &grep_info,
1319                           index_type, complete_flag,
1320                           rank_type,
1321                           xpath_use, rset_nmem,
1322                           &(*result_sets)[*num_result_sets],
1323                           kc, zm);
1324         if (res != ZEBRA_OK)
1325         {
1326             int i;
1327             for (i = 0; i < *num_result_sets; i++)
1328                 rset_delete((*result_sets)[i]);
1329             grep_info_delete(&grep_info);
1330             return res;
1331         }
1332         if ((*result_sets)[*num_result_sets] == 0)
1333             break;
1334         (*num_result_sets)++;
1335
1336         if (!*termp)
1337             break;
1338     }
1339     grep_info_delete(&grep_info);
1340     return ZEBRA_OK;
1341 }
1342                                
1343 /**
1344    \brief Create result set(s) for list of terms
1345    \param zh Zebra Handle
1346    \param zapt Attributes Plust Term (RPN leaf)
1347    \param termz term as used in query but converted to UTF-8
1348    \param attributeSet default attribute set
1349    \param stream memory for result
1350    \param index_type register type ("w", "p",..)
1351    \param complete_flag whether it's phrases or not
1352    \param rank_type term flags for ranking
1353    \param xpath_use use attribute for X-Path (-1 for no X-path)
1354    \param rset_nmem memory for result sets
1355    \param result_sets output result set for each term in list (output)
1356    \param num_result_sets number of output result sets
1357    \param kc rset key control to be used for created result sets
1358 */
1359 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1360                                    Z_AttributesPlusTerm *zapt,
1361                                    const char *termz,
1362                                    const Odr_oid *attributeSet,
1363                                    zint hits_limit,
1364                                    NMEM stream,
1365                                    const char *index_type, int complete_flag,
1366                                    const char *rank_type,
1367                                    const char *xpath_use,
1368                                    NMEM rset_nmem,
1369                                    RSET **result_sets, int *num_result_sets,
1370                                    struct rset_key_control *kc)
1371 {
1372     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1373     if (zebra_maps_is_icu(zm))
1374         zebra_map_tokenize_start(zm, termz, strlen(termz));
1375     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1376                                stream, index_type, complete_flag,
1377                                rank_type, xpath_use,
1378                                rset_nmem, result_sets, num_result_sets,
1379                                kc, zm);
1380 }
1381
1382
1383 /** \brief limit a search by position - returns result set
1384  */
1385 static ZEBRA_RES search_position(ZebraHandle zh,
1386                                  Z_AttributesPlusTerm *zapt,
1387                                  const Odr_oid *attributeSet,
1388                                  const char *index_type,
1389                                  NMEM rset_nmem,
1390                                  RSET *rset,
1391                                  struct rset_key_control *kc)
1392 {
1393     int position_value;
1394     AttrType position;
1395     int ord = -1;
1396     char ord_buf[32];
1397     char term_dict[100];
1398     int ord_len;
1399     char *val;
1400     ISAM_P isam_p;
1401     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1402     
1403     attr_init_APT(&position, zapt, 3);
1404     position_value = attr_find(&position, NULL);
1405     switch(position_value)
1406     {
1407     case 3:
1408     case -1:
1409         return ZEBRA_OK;
1410     case 1:
1411     case 2:
1412         break;
1413     default:
1414         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1415                             position_value);
1416         return ZEBRA_FAIL;
1417     }
1418
1419
1420     if (!zebra_maps_is_first_in_field(zm))
1421     {
1422         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1423                             position_value);
1424         return ZEBRA_FAIL;
1425     }
1426
1427     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1428                           attributeSet, &ord) != ZEBRA_OK)
1429     {
1430         return ZEBRA_FAIL;
1431     }
1432     ord_len = key_SU_encode(ord, ord_buf);
1433     memcpy(term_dict, ord_buf, ord_len);
1434     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1435     val = dict_lookup(zh->reg->dict, term_dict);
1436     if (val)
1437     {
1438         assert(*val == sizeof(ISAM_P));
1439         memcpy(&isam_p, val+1, sizeof(isam_p));
1440
1441         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1442                                        isam_p, 0);
1443     }
1444     return ZEBRA_OK;
1445 }
1446
1447 /** \brief returns result set for phrase search
1448  */
1449 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1450                                        Z_AttributesPlusTerm *zapt,
1451                                        const char *termz_org,
1452                                        const Odr_oid *attributeSet,
1453                                        zint hits_limit,
1454                                        NMEM stream,
1455                                        const char *index_type,
1456                                        int complete_flag,
1457                                        const char *rank_type,
1458                                        const char *xpath_use,
1459                                        NMEM rset_nmem,
1460                                        RSET *rset,
1461                                        struct rset_key_control *kc)
1462 {
1463     RSET *result_sets = 0;
1464     int num_result_sets = 0;
1465     ZEBRA_RES res =
1466         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1467                           stream, index_type, complete_flag,
1468                           rank_type, xpath_use,
1469                           rset_nmem,
1470                           &result_sets, &num_result_sets, kc);
1471     
1472     if (res != ZEBRA_OK)
1473         return res;
1474
1475     if (num_result_sets > 0)
1476     {
1477         RSET first_set = 0;
1478         res = search_position(zh, zapt, attributeSet, 
1479                               index_type,
1480                               rset_nmem, &first_set,
1481                               kc);
1482         if (res != ZEBRA_OK)
1483         {
1484             int i;
1485             for (i = 0; i<num_result_sets; i++)
1486                 rset_delete(result_sets[i]);
1487             return res;
1488         }
1489         if (first_set)
1490         {
1491             RSET *nsets = nmem_malloc(stream,
1492                                       sizeof(RSET) * (num_result_sets+1));
1493             nsets[0] = first_set;
1494             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1495             result_sets = nsets;
1496             num_result_sets++;
1497         }
1498     }
1499     if (num_result_sets == 0)
1500         *rset = rset_create_null(rset_nmem, kc, 0); 
1501     else if (num_result_sets == 1)
1502         *rset = result_sets[0];
1503     else
1504         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1505                                  num_result_sets, result_sets,
1506                                  1 /* ordered */, 0 /* exclusion */,
1507                                  3 /* relation */, 1 /* distance */);
1508     if (!*rset)
1509         return ZEBRA_FAIL;
1510     return ZEBRA_OK;
1511 }
1512
1513 /** \brief returns result set for or-list search
1514  */
1515 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1516                                         Z_AttributesPlusTerm *zapt,
1517                                         const char *termz_org,
1518                                         const Odr_oid *attributeSet,
1519                                         zint hits_limit,
1520                                         NMEM stream,
1521                                         const char *index_type, 
1522                                         int complete_flag,
1523                                         const char *rank_type,
1524                                         const char *xpath_use,
1525                                         NMEM rset_nmem,
1526                                         RSET *rset,
1527                                         struct rset_key_control *kc)
1528 {
1529     RSET *result_sets = 0;
1530     int num_result_sets = 0;
1531     int i;
1532     ZEBRA_RES res =
1533         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1534                           stream, index_type, complete_flag,
1535                           rank_type, xpath_use,
1536                           rset_nmem,
1537                           &result_sets, &num_result_sets, kc);
1538     if (res != ZEBRA_OK)
1539         return res;
1540
1541     for (i = 0; i<num_result_sets; i++)
1542     {
1543         RSET first_set = 0;
1544         res = search_position(zh, zapt, attributeSet, 
1545                               index_type,
1546                               rset_nmem, &first_set,
1547                               kc);
1548         if (res != ZEBRA_OK)
1549         {
1550             for (i = 0; i<num_result_sets; i++)
1551                 rset_delete(result_sets[i]);
1552             return res;
1553         }
1554
1555         if (first_set)
1556         {
1557             RSET tmp_set[2];
1558
1559             tmp_set[0] = first_set;
1560             tmp_set[1] = result_sets[i];
1561             
1562             result_sets[i] = rset_create_prox(
1563                 rset_nmem, kc, kc->scope,
1564                 2, tmp_set,
1565                 1 /* ordered */, 0 /* exclusion */,
1566                 3 /* relation */, 1 /* distance */);
1567         }
1568     }
1569     if (num_result_sets == 0)
1570         *rset = rset_create_null(rset_nmem, kc, 0); 
1571     else if (num_result_sets == 1)
1572         *rset = result_sets[0];
1573     else
1574         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1575                                num_result_sets, result_sets);
1576     if (!*rset)
1577         return ZEBRA_FAIL;
1578     return ZEBRA_OK;
1579 }
1580
1581 /** \brief returns result set for and-list search
1582  */
1583 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1584                                          Z_AttributesPlusTerm *zapt,
1585                                          const char *termz_org,
1586                                          const Odr_oid *attributeSet,
1587                                          zint hits_limit,
1588                                          NMEM stream,
1589                                          const char *index_type, 
1590                                          int complete_flag,
1591                                          const char *rank_type, 
1592                                          const char *xpath_use,
1593                                          NMEM rset_nmem,
1594                                          RSET *rset,
1595                                          struct rset_key_control *kc)
1596 {
1597     RSET *result_sets = 0;
1598     int num_result_sets = 0;
1599     int i;
1600     ZEBRA_RES res =
1601         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1602                           stream, index_type, complete_flag,
1603                           rank_type, xpath_use,
1604                           rset_nmem,
1605                           &result_sets, &num_result_sets,
1606                           kc);
1607     if (res != ZEBRA_OK)
1608         return res;
1609     for (i = 0; i<num_result_sets; i++)
1610     {
1611         RSET first_set = 0;
1612         res = search_position(zh, zapt, attributeSet, 
1613                               index_type,
1614                               rset_nmem, &first_set,
1615                               kc);
1616         if (res != ZEBRA_OK)
1617         {
1618             for (i = 0; i<num_result_sets; i++)
1619                 rset_delete(result_sets[i]);
1620             return res;
1621         }
1622
1623         if (first_set)
1624         {
1625             RSET tmp_set[2];
1626
1627             tmp_set[0] = first_set;
1628             tmp_set[1] = result_sets[i];
1629             
1630             result_sets[i] = rset_create_prox(
1631                 rset_nmem, kc, kc->scope,
1632                 2, tmp_set,
1633                 1 /* ordered */, 0 /* exclusion */,
1634                 3 /* relation */, 1 /* distance */);
1635         }
1636     }
1637
1638
1639     if (num_result_sets == 0)
1640         *rset = rset_create_null(rset_nmem, kc, 0); 
1641     else if (num_result_sets == 1)
1642         *rset = result_sets[0];
1643     else
1644         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1645                                 num_result_sets, result_sets);
1646     if (!*rset)
1647         return ZEBRA_FAIL;
1648     return ZEBRA_OK;
1649 }
1650
1651 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1652                             const char **term_sub,
1653                             WRBUF term_dict,
1654                             const Odr_oid *attributeSet,
1655                             struct grep_info *grep_info,
1656                             int *max_pos,
1657                             zebra_map_t zm,
1658                             WRBUF display_term,
1659                             int *error_code)
1660 {
1661     AttrType relation;
1662     int relation_value;
1663     int term_value;
1664     int r;
1665     WRBUF term_num = wrbuf_alloc();
1666
1667     *error_code = 0;
1668     attr_init_APT(&relation, zapt, 2);
1669     relation_value = attr_find(&relation, NULL);
1670
1671     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1672
1673     switch (relation_value)
1674     {
1675     case 1:
1676         yaz_log(log_level_rpn, "Relation <");
1677         if (!term_100(zm, term_sub, term_num, 1, display_term))
1678         { 
1679             wrbuf_destroy(term_num);
1680             return 0;
1681         }
1682         term_value = atoi(wrbuf_cstr(term_num));
1683         gen_regular_rel(term_dict, term_value-1, 1);
1684         break;
1685     case 2:
1686         yaz_log(log_level_rpn, "Relation <=");
1687         if (!term_100(zm, term_sub, term_num, 1, display_term))
1688         {
1689             wrbuf_destroy(term_num);
1690             return 0;
1691         }
1692         term_value = atoi(wrbuf_cstr(term_num));
1693         gen_regular_rel(term_dict, term_value, 1);
1694         break;
1695     case 4:
1696         yaz_log(log_level_rpn, "Relation >=");
1697         if (!term_100(zm, term_sub, term_num, 1, display_term))
1698         {
1699             wrbuf_destroy(term_num);
1700             return 0;
1701         }
1702         term_value = atoi(wrbuf_cstr(term_num));
1703         gen_regular_rel(term_dict, term_value, 0);
1704         break;
1705     case 5:
1706         yaz_log(log_level_rpn, "Relation >");
1707         if (!term_100(zm, term_sub, term_num, 1, display_term))
1708         {
1709             wrbuf_destroy(term_num);
1710             return 0;
1711         }
1712         term_value = atoi(wrbuf_cstr(term_num));
1713         gen_regular_rel(term_dict, term_value+1, 0);
1714         break;
1715     case -1:
1716     case 3:
1717         yaz_log(log_level_rpn, "Relation =");
1718         if (!term_100(zm, term_sub, term_num, 1, display_term))
1719         {
1720             wrbuf_destroy(term_num);
1721             return 0; 
1722         }
1723         term_value = atoi(wrbuf_cstr(term_num));
1724         wrbuf_printf(term_dict, "(0*%d)", term_value);
1725         break;
1726     case 103:
1727         /* term_tmp untouched.. */
1728         while (**term_sub != '\0')
1729             (*term_sub)++;
1730         break;
1731     default:
1732         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1733         wrbuf_destroy(term_num); 
1734         return 0;
1735     }
1736     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1737                          0, grep_info, max_pos, 0, grep_handle);
1738
1739     if (r == 1)
1740         zebra_set_partial_result(zh);
1741     else if (r)
1742         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1743     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1744     wrbuf_destroy(term_num);
1745     return 1;
1746 }
1747
1748 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1749                               const char **term_sub, 
1750                               WRBUF term_dict,
1751                               const Odr_oid *attributeSet, NMEM stream,
1752                               struct grep_info *grep_info,
1753                               const char *index_type, int complete_flag,
1754                               WRBUF display_term,
1755                               const char *xpath_use,
1756                               struct ord_list **ol)
1757 {
1758     const char *termp;
1759     struct rpn_char_map_info rcmi;
1760     int max_pos;
1761     int relation_error = 0;
1762     int ord, ord_len, i;
1763     char ord_buf[32];
1764     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1765     
1766     *ol = ord_list_create(stream);
1767
1768     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1769
1770     termp = *term_sub;
1771     
1772     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1773                           attributeSet, &ord) != ZEBRA_OK)
1774     {
1775         return ZEBRA_FAIL;
1776     }
1777     
1778     wrbuf_rewind(term_dict);
1779     
1780     *ol = ord_list_append(stream, *ol, ord);
1781     
1782     ord_len = key_SU_encode(ord, ord_buf);
1783     
1784     wrbuf_putc(term_dict, '(');
1785     for (i = 0; i < ord_len; i++)
1786     {
1787         wrbuf_putc(term_dict, 1);
1788         wrbuf_putc(term_dict, ord_buf[i]);
1789     }
1790     wrbuf_putc(term_dict, ')');
1791     
1792     if (!numeric_relation(zh, zapt, &termp, term_dict,
1793                           attributeSet, grep_info, &max_pos, zm,
1794                           display_term, &relation_error))
1795     {
1796         if (relation_error)
1797         {
1798             zebra_setError(zh, relation_error, 0);
1799             return ZEBRA_FAIL;
1800         }
1801         *term_sub = 0;
1802         return ZEBRA_OK;
1803     }
1804     *term_sub = termp;
1805     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1806     return ZEBRA_OK;
1807 }
1808
1809                                  
1810 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1811                                         Z_AttributesPlusTerm *zapt,
1812                                         const char *termz,
1813                                         const Odr_oid *attributeSet,
1814                                         NMEM stream,
1815                                         const char *index_type, 
1816                                         int complete_flag,
1817                                         const char *rank_type, 
1818                                         const char *xpath_use,
1819                                         NMEM rset_nmem,
1820                                         RSET *rset,
1821                                         struct rset_key_control *kc)
1822 {
1823     const char *termp = termz;
1824     RSET *result_sets = 0;
1825     int num_result_sets = 0;
1826     ZEBRA_RES res;
1827     struct grep_info grep_info;
1828     int alloc_sets = 0;
1829     zint hits_limit_value;
1830     const char *term_ref_id_str = 0;
1831
1832     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1833                           stream);
1834
1835     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1836     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1837         return ZEBRA_FAIL;
1838     while (1)
1839     { 
1840         struct ord_list *ol;
1841         WRBUF term_dict = wrbuf_alloc();
1842         WRBUF display_term = wrbuf_alloc();
1843         if (alloc_sets == num_result_sets)
1844         {
1845             int add = 10;
1846             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1847                                               sizeof(*rnew));
1848             if (alloc_sets)
1849                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1850             alloc_sets = alloc_sets + add;
1851             result_sets = rnew;
1852         }
1853         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1854         grep_info.isam_p_indx = 0;
1855         res = numeric_term(zh, zapt, &termp, term_dict,
1856                            attributeSet, stream, &grep_info,
1857                            index_type, complete_flag,
1858                            display_term, xpath_use, &ol);
1859         wrbuf_destroy(term_dict);
1860         if (res == ZEBRA_FAIL || termp == 0)
1861         {
1862             wrbuf_destroy(display_term);
1863             break;
1864         }
1865         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1866         result_sets[num_result_sets] =
1867             rset_trunc(zh, grep_info.isam_p_buf,
1868                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1869                        wrbuf_len(display_term), rank_type,
1870                        0 /* preserve position */,
1871                        zapt->term->which, rset_nmem, 
1872                        kc, kc->scope, ol, index_type,
1873                        hits_limit_value,
1874                        term_ref_id_str);
1875         wrbuf_destroy(display_term);
1876         if (!result_sets[num_result_sets])
1877             break;
1878         num_result_sets++;
1879         if (!*termp)
1880             break;
1881     }
1882     grep_info_delete(&grep_info);
1883
1884     if (res != ZEBRA_OK)
1885         return res;
1886     if (num_result_sets == 0)
1887         *rset = rset_create_null(rset_nmem, kc, 0);
1888     else if (num_result_sets == 1)
1889         *rset = result_sets[0];
1890     else
1891         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1892                                 num_result_sets, result_sets);
1893     if (!*rset)
1894         return ZEBRA_FAIL;
1895     return ZEBRA_OK;
1896 }
1897
1898 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1899                                       Z_AttributesPlusTerm *zapt,
1900                                       const char *termz,
1901                                       const Odr_oid *attributeSet,
1902                                       NMEM stream,
1903                                       const char *rank_type, NMEM rset_nmem,
1904                                       RSET *rset,
1905                                       struct rset_key_control *kc)
1906 {
1907     Record rec;
1908     zint sysno = atozint(termz);
1909     
1910     if (sysno <= 0)
1911         sysno = 0;
1912     rec = rec_get(zh->reg->records, sysno);
1913     if (!rec)
1914         sysno = 0;
1915
1916     rec_free(&rec);
1917
1918     if (sysno <= 0)
1919     {
1920         *rset = rset_create_null(rset_nmem, kc, 0);
1921     }
1922     else
1923     {
1924         RSFD rsfd;
1925         struct it_key key;
1926         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1927                                  res_get(zh->res, "setTmpDir"), 0);
1928         rsfd = rset_open(*rset, RSETF_WRITE);
1929         
1930         key.mem[0] = sysno;
1931         key.mem[1] = 1;
1932         key.len = 2;
1933         rset_write(rsfd, &key);
1934         rset_close(rsfd);
1935     }
1936     return ZEBRA_OK;
1937 }
1938
1939 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1940                                const Odr_oid *attributeSet, NMEM stream,
1941                                Z_SortKeySpecList *sort_sequence,
1942                                const char *rank_type,
1943                                NMEM rset_nmem,
1944                                RSET *rset,
1945                                struct rset_key_control *kc)
1946 {
1947     int i;
1948     int sort_relation_value;
1949     AttrType sort_relation_type;
1950     Z_SortKeySpec *sks;
1951     Z_SortKey *sk;
1952     char termz[20];
1953     
1954     attr_init_APT(&sort_relation_type, zapt, 7);
1955     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1956
1957     if (!sort_sequence->specs)
1958     {
1959         sort_sequence->num_specs = 10;
1960         sort_sequence->specs = (Z_SortKeySpec **)
1961             nmem_malloc(stream, sort_sequence->num_specs *
1962                         sizeof(*sort_sequence->specs));
1963         for (i = 0; i<sort_sequence->num_specs; i++)
1964             sort_sequence->specs[i] = 0;
1965     }
1966     if (zapt->term->which != Z_Term_general)
1967         i = 0;
1968     else
1969         i = atoi_n((char *) zapt->term->u.general->buf,
1970                    zapt->term->u.general->len);
1971     if (i >= sort_sequence->num_specs)
1972         i = 0;
1973     sprintf(termz, "%d", i);
1974
1975     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1976     sks->sortElement = (Z_SortElement *)
1977         nmem_malloc(stream, sizeof(*sks->sortElement));
1978     sks->sortElement->which = Z_SortElement_generic;
1979     sk = sks->sortElement->u.generic = (Z_SortKey *)
1980         nmem_malloc(stream, sizeof(*sk));
1981     sk->which = Z_SortKey_sortAttributes;
1982     sk->u.sortAttributes = (Z_SortAttributes *)
1983         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1984
1985     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1986     sk->u.sortAttributes->list = zapt->attributes;
1987
1988     sks->sortRelation = (Odr_int *)
1989         nmem_malloc(stream, sizeof(*sks->sortRelation));
1990     if (sort_relation_value == 1)
1991         *sks->sortRelation = Z_SortKeySpec_ascending;
1992     else if (sort_relation_value == 2)
1993         *sks->sortRelation = Z_SortKeySpec_descending;
1994     else 
1995         *sks->sortRelation = Z_SortKeySpec_ascending;
1996
1997     sks->caseSensitivity = (Odr_int *)
1998         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1999     *sks->caseSensitivity = 0;
2000
2001     sks->which = Z_SortKeySpec_null;
2002     sks->u.null = odr_nullval ();
2003     sort_sequence->specs[i] = sks;
2004     *rset = rset_create_null(rset_nmem, kc, 0);
2005     return ZEBRA_OK;
2006 }
2007
2008
2009 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2010                            const Odr_oid *attributeSet,
2011                            struct xpath_location_step *xpath, int max,
2012                            NMEM mem)
2013 {
2014     const Odr_oid *curAttributeSet = attributeSet;
2015     AttrType use;
2016     const char *use_string = 0;
2017     
2018     attr_init_APT(&use, zapt, 1);
2019     attr_find_ex(&use, &curAttributeSet, &use_string);
2020
2021     if (!use_string || *use_string != '/')
2022         return -1;
2023
2024     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2025 }
2026  
2027                
2028
2029 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2030                         const char *index_type, const char *term, 
2031                         const char *xpath_use,
2032                         NMEM rset_nmem,
2033                         struct rset_key_control *kc)
2034 {
2035     struct grep_info grep_info;
2036     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2037                                            zinfo_index_category_index,
2038                                            index_type, xpath_use);
2039     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2040         return rset_create_null(rset_nmem, kc, 0);
2041     
2042     if (ord < 0)
2043         return rset_create_null(rset_nmem, kc, 0);
2044     else
2045     {
2046         int i, r, max_pos;
2047         char ord_buf[32];
2048         RSET rset;
2049         WRBUF term_dict = wrbuf_alloc();
2050         int ord_len = key_SU_encode(ord, ord_buf);
2051         int term_type = Z_Term_characterString;
2052         const char *flags = "void";
2053
2054         wrbuf_putc(term_dict, '(');
2055         for (i = 0; i<ord_len; i++)
2056         {
2057             wrbuf_putc(term_dict, 1);
2058             wrbuf_putc(term_dict, ord_buf[i]);
2059         }
2060         wrbuf_putc(term_dict, ')');
2061         wrbuf_puts(term_dict, term);
2062         
2063         grep_info.isam_p_indx = 0;
2064         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2065                              &grep_info, &max_pos, 0, grep_handle);
2066         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2067                 grep_info.isam_p_indx);
2068         rset = rset_trunc(zh, grep_info.isam_p_buf,
2069                           grep_info.isam_p_indx, term, strlen(term),
2070                           flags, 1, term_type, rset_nmem,
2071                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2072                           0 /* term_ref_id_str */);
2073         grep_info_delete(&grep_info);
2074         wrbuf_destroy(term_dict);
2075         return rset;
2076     }
2077 }
2078
2079 static
2080 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2081                            NMEM stream, const char *rank_type, RSET rset,
2082                            int xpath_len, struct xpath_location_step *xpath,
2083                            NMEM rset_nmem,
2084                            RSET *rset_out,
2085                            struct rset_key_control *kc)
2086 {
2087     int i;
2088     int always_matches = rset ? 0 : 1;
2089
2090     if (xpath_len < 0)
2091     {
2092         *rset_out = rset;
2093         return ZEBRA_OK;
2094     }
2095
2096     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2097     for (i = 0; i<xpath_len; i++)
2098     {
2099         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2100
2101     }
2102
2103     /*
2104     //a    ->    a/.*
2105     //a/b  ->    b/a/.*
2106     /a     ->    a/
2107     /a/b   ->    b/a/
2108
2109     /      ->    none
2110
2111     a[@attr = value]/b[@other = othervalue]
2112
2113     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2114     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2115     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2116     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2117     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2118     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2119       
2120     */
2121
2122     dict_grep_cmap(zh->reg->dict, 0, 0);
2123     
2124     {
2125         int level = xpath_len;
2126         int first_path = 1;
2127         
2128         while (--level >= 0)
2129         {
2130             WRBUF xpath_rev = wrbuf_alloc();
2131             int i;
2132             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2133
2134             for (i = level; i >= 1; --i)
2135             {
2136                 const char *cp = xpath[i].part;
2137                 if (*cp)
2138                 {
2139                     for (; *cp; cp++)
2140                     {
2141                         if (*cp == '*')
2142                             wrbuf_puts(xpath_rev, "[^/]*");
2143                         else if (*cp == ' ')
2144                             wrbuf_puts(xpath_rev, "\001 ");
2145                         else
2146                             wrbuf_putc(xpath_rev, *cp);
2147
2148                         /* wrbuf_putc does not null-terminate , but
2149                            wrbuf_puts below ensures it does.. so xpath_rev
2150                            is OK iff length is > 0 */
2151                     }
2152                     wrbuf_puts(xpath_rev, "/");
2153                 }
2154                 else if (i == 1)  /* // case */
2155                     wrbuf_puts(xpath_rev, ".*");
2156             }
2157             if (xpath[level].predicate &&
2158                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2159                 xpath[level].predicate->u.relation.name[0])
2160             {
2161                 WRBUF wbuf = wrbuf_alloc();
2162                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2163                 if (xpath[level].predicate->u.relation.value)
2164                 {
2165                     const char *cp = xpath[level].predicate->u.relation.value;
2166                     wrbuf_putc(wbuf, '=');
2167                     
2168                     while (*cp)
2169                     {
2170                         if (strchr(REGEX_CHARS, *cp))
2171                             wrbuf_putc(wbuf, '\\');
2172                         wrbuf_putc(wbuf, *cp);
2173                         cp++;
2174                     }
2175                 }
2176                 rset_attr = xpath_trunc(
2177                     zh, stream, "0", wrbuf_cstr(wbuf), 
2178                     ZEBRA_XPATH_ATTR_NAME, 
2179                     rset_nmem, kc);
2180                 wrbuf_destroy(wbuf);
2181             } 
2182             else 
2183             {
2184                 if (!first_path)
2185                 {
2186                     wrbuf_destroy(xpath_rev);
2187                     continue;
2188                 }
2189             }
2190             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2191                     wrbuf_cstr(xpath_rev));
2192             if (wrbuf_len(xpath_rev))
2193             {
2194                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2195                                              wrbuf_cstr(xpath_rev),
2196                                              ZEBRA_XPATH_ELM_BEGIN, 
2197                                              rset_nmem, kc);
2198                 if (always_matches)
2199                     rset = rset_start_tag;
2200                 else
2201                 {
2202                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2203                                                wrbuf_cstr(xpath_rev),
2204                                                ZEBRA_XPATH_ELM_END, 
2205                                                rset_nmem, kc);
2206                     
2207                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2208                                                rset_start_tag, rset,
2209                                                rset_end_tag, rset_attr);
2210                 }
2211             }
2212             wrbuf_destroy(xpath_rev);
2213             first_path = 0;
2214         }
2215     }
2216     *rset_out = rset;
2217     return ZEBRA_OK;
2218 }
2219
2220 #define MAX_XPATH_STEPS 10
2221
2222 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2223                                      Z_AttributesPlusTerm *zapt,
2224                                      const Odr_oid *attributeSet,
2225                                      zint hits_limit, NMEM stream,
2226                                      Z_SortKeySpecList *sort_sequence,
2227                                      NMEM rset_nmem,
2228                                      RSET *rset,
2229                                      struct rset_key_control *kc);
2230
2231 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2232                                 const Odr_oid *attributeSet,
2233                                 zint hits_limit, NMEM stream,
2234                                 Z_SortKeySpecList *sort_sequence,
2235                                 int num_bases, const char **basenames, 
2236                                 NMEM rset_nmem,
2237                                 RSET *rset,
2238                                 struct rset_key_control *kc)
2239 {
2240     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2241     ZEBRA_RES res = ZEBRA_OK;
2242     int i;
2243     for (i = 0; i < num_bases; i++)
2244     {
2245
2246         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2247         {
2248             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2249                            basenames[i]);
2250             res = ZEBRA_FAIL;
2251             break;
2252         }
2253         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2254                                   sort_sequence,
2255                                   rset_nmem, rsets+i, kc);
2256         if (res != ZEBRA_OK)
2257             break;
2258     }
2259     if (res != ZEBRA_OK)
2260     {   /* must clean up the already created sets */
2261         while (--i >= 0)
2262             rset_delete(rsets[i]);
2263         *rset = 0;
2264     }
2265     else 
2266     {
2267         if (num_bases == 1)
2268             *rset = rsets[0];
2269         else if (num_bases == 0)
2270             *rset = rset_create_null(rset_nmem, kc, 0); 
2271         else
2272             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2273                                    num_bases, rsets);
2274     }
2275     return res;
2276 }
2277
2278 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2279                                      Z_AttributesPlusTerm *zapt,
2280                                      const Odr_oid *attributeSet,
2281                                      zint hits_limit, NMEM stream,
2282                                      Z_SortKeySpecList *sort_sequence,
2283                                      NMEM rset_nmem,
2284                                      RSET *rset,
2285                                      struct rset_key_control *kc)
2286 {
2287     ZEBRA_RES res = ZEBRA_OK;
2288     const char *index_type;
2289     char *search_type = NULL;
2290     char rank_type[128];
2291     int complete_flag;
2292     int sort_flag;
2293     char termz[IT_MAX_WORD+1];
2294     int xpath_len;
2295     const char *xpath_use = 0;
2296     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2297
2298     if (!log_level_set)
2299     {
2300         log_level_rpn = yaz_log_module_level("rpn");
2301         log_level_set = 1;
2302     }
2303     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2304                     rank_type, &complete_flag, &sort_flag);
2305     
2306     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2307     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2308     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2309     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2310
2311     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2312         return ZEBRA_FAIL;
2313
2314     if (sort_flag)
2315         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2316                              rank_type, rset_nmem, rset, kc);
2317     /* consider if an X-Path query is used */
2318     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2319                                 xpath, MAX_XPATH_STEPS, stream);
2320     if (xpath_len >= 0)
2321     {
2322         if (xpath[xpath_len-1].part[0] == '@') 
2323             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2324         else
2325             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2326
2327         if (1)
2328         {
2329             AttrType relation;
2330             int relation_value;
2331
2332             attr_init_APT(&relation, zapt, 2);
2333             relation_value = attr_find(&relation, NULL);
2334
2335             if (relation_value == 103) /* alwaysmatches */
2336             {
2337                 *rset = 0; /* signal no "term" set */
2338                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2339                                         xpath_len, xpath, rset_nmem, rset, kc);
2340             }
2341         }
2342     }
2343
2344     /* search using one of the various search type strategies
2345        termz is our UTF-8 search term
2346        attributeSet is top-level default attribute set 
2347        stream is ODR for search
2348        reg_id is the register type
2349        complete_flag is 1 for complete subfield, 0 for incomplete
2350        xpath_use is use-attribute to be used for X-Path search, 0 for none
2351     */
2352     if (!strcmp(search_type, "phrase"))
2353     {
2354         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2355                                     stream,
2356                                     index_type, complete_flag, rank_type,
2357                                     xpath_use,
2358                                     rset_nmem,
2359                                     rset, kc);
2360     }
2361     else if (!strcmp(search_type, "and-list"))
2362     {
2363         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2364                                       stream,
2365                                       index_type, complete_flag, rank_type,
2366                                       xpath_use,
2367                                       rset_nmem,
2368                                       rset, kc);
2369     }
2370     else if (!strcmp(search_type, "or-list"))
2371     {
2372         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2373                                      stream,
2374                                      index_type, complete_flag, rank_type,
2375                                      xpath_use,
2376                                      rset_nmem,
2377                                      rset, kc);
2378     }
2379     else if (!strcmp(search_type, "local"))
2380     {
2381         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2382                                    rank_type, rset_nmem, rset, kc);
2383     }
2384     else if (!strcmp(search_type, "numeric"))
2385     {
2386         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2387                                      index_type, complete_flag, rank_type,
2388                                      xpath_use,
2389                                      rset_nmem,
2390                                      rset, kc);
2391     }
2392     else
2393     {
2394         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2395         res = ZEBRA_FAIL;
2396     }
2397     if (res != ZEBRA_OK)
2398         return res;
2399     if (!*rset)
2400         return ZEBRA_FAIL;
2401     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2402                             xpath_len, xpath, rset_nmem, rset, kc);
2403 }
2404
2405 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2406                                       const Odr_oid *attributeSet,
2407                                       zint hits_limit,
2408                                       NMEM stream, NMEM rset_nmem,
2409                                       Z_SortKeySpecList *sort_sequence,
2410                                       int num_bases, const char **basenames,
2411                                       RSET **result_sets, int *num_result_sets,
2412                                       Z_Operator *parent_op,
2413                                       struct rset_key_control *kc);
2414
2415 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2416                                    zint *approx_limit)
2417 {
2418     ZEBRA_RES res = ZEBRA_OK;
2419     if (zs->which == Z_RPNStructure_complex)
2420     {
2421         if (res == ZEBRA_OK)
2422             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2423                                            approx_limit);
2424         if (res == ZEBRA_OK)
2425             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2426                                            approx_limit);
2427     }
2428     else if (zs->which == Z_RPNStructure_simple)
2429     {
2430         if (zs->u.simple->which == Z_Operand_APT)
2431         {
2432             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2433             AttrType global_hits_limit_attr;
2434             int l;
2435             
2436             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2437             
2438             l = attr_find(&global_hits_limit_attr, NULL);
2439             if (l != -1)
2440                 *approx_limit = l;
2441         }
2442     }
2443     return res;
2444 }
2445
2446 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2447                          const Odr_oid *attributeSet, 
2448                          zint hits_limit,
2449                          NMEM stream, NMEM rset_nmem,
2450                          Z_SortKeySpecList *sort_sequence,
2451                          int num_bases, const char **basenames,
2452                          RSET *result_set)
2453 {
2454     RSET *result_sets = 0;
2455     int num_result_sets = 0;
2456     ZEBRA_RES res;
2457     struct rset_key_control *kc = zebra_key_control_create(zh);
2458
2459     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2460                                stream, rset_nmem,
2461                                sort_sequence, 
2462                                num_bases, basenames,
2463                                &result_sets, &num_result_sets,
2464                                0 /* no parent op */,
2465                                kc);
2466     if (res != ZEBRA_OK)
2467     {
2468         int i;
2469         for (i = 0; i<num_result_sets; i++)
2470             rset_delete(result_sets[i]);
2471         *result_set = 0;
2472     }
2473     else
2474     {
2475         assert(num_result_sets == 1);
2476         assert(result_sets);
2477         assert(*result_sets);
2478         *result_set = *result_sets;
2479     }
2480     (*kc->dec)(kc);
2481     return res;
2482 }
2483
2484 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2485                                const Odr_oid *attributeSet, zint hits_limit,
2486                                NMEM stream, NMEM rset_nmem,
2487                                Z_SortKeySpecList *sort_sequence,
2488                                int num_bases, const char **basenames,
2489                                RSET **result_sets, int *num_result_sets,
2490                                Z_Operator *parent_op,
2491                                struct rset_key_control *kc)
2492 {
2493     *num_result_sets = 0;
2494     if (zs->which == Z_RPNStructure_complex)
2495     {
2496         ZEBRA_RES res;
2497         Z_Operator *zop = zs->u.complex->roperator;
2498         RSET *result_sets_l = 0;
2499         int num_result_sets_l = 0;
2500         RSET *result_sets_r = 0;
2501         int num_result_sets_r = 0;
2502
2503         res = rpn_search_structure(zh, zs->u.complex->s1,
2504                                    attributeSet, hits_limit, stream, rset_nmem,
2505                                    sort_sequence,
2506                                    num_bases, basenames,
2507                                    &result_sets_l, &num_result_sets_l,
2508                                    zop, kc);
2509         if (res != ZEBRA_OK)
2510         {
2511             int i;
2512             for (i = 0; i<num_result_sets_l; i++)
2513                 rset_delete(result_sets_l[i]);
2514             return res;
2515         }
2516         res = rpn_search_structure(zh, zs->u.complex->s2,
2517                                    attributeSet, hits_limit, stream, rset_nmem,
2518                                    sort_sequence,
2519                                    num_bases, basenames,
2520                                    &result_sets_r, &num_result_sets_r,
2521                                    zop, kc);
2522         if (res != ZEBRA_OK)
2523         {
2524             int i;
2525             for (i = 0; i<num_result_sets_l; i++)
2526                 rset_delete(result_sets_l[i]);
2527             for (i = 0; i<num_result_sets_r; i++)
2528                 rset_delete(result_sets_r[i]);
2529             return res;
2530         }
2531
2532         /* make a new list of result for all children */
2533         *num_result_sets = num_result_sets_l + num_result_sets_r;
2534         *result_sets = nmem_malloc(stream, *num_result_sets * 
2535                                    sizeof(**result_sets));
2536         memcpy(*result_sets, result_sets_l, 
2537                num_result_sets_l * sizeof(**result_sets));
2538         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2539                num_result_sets_r * sizeof(**result_sets));
2540
2541         if (!parent_op || parent_op->which != zop->which
2542             || (zop->which != Z_Operator_and &&
2543                 zop->which != Z_Operator_or))
2544         {
2545             /* parent node different from this one (or non-present) */
2546             /* we must combine result sets now */
2547             RSET rset;
2548             switch (zop->which)
2549             {
2550             case Z_Operator_and:
2551                 rset = rset_create_and(rset_nmem, kc,
2552                                        kc->scope,
2553                                        *num_result_sets, *result_sets);
2554                 break;
2555             case Z_Operator_or:
2556                 rset = rset_create_or(rset_nmem, kc,
2557                                       kc->scope, 0, /* termid */
2558                                       *num_result_sets, *result_sets);
2559                 break;
2560             case Z_Operator_and_not:
2561                 rset = rset_create_not(rset_nmem, kc,
2562                                        kc->scope,
2563                                        (*result_sets)[0],
2564                                        (*result_sets)[1]);
2565                 break;
2566             case Z_Operator_prox:
2567                 if (zop->u.prox->which != Z_ProximityOperator_known)
2568                 {
2569                     zebra_setError(zh, 
2570                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2571                                    0);
2572                     return ZEBRA_FAIL;
2573                 }
2574                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2575                 {
2576                     zebra_setError_zint(zh,
2577                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2578                                         *zop->u.prox->u.known);
2579                     return ZEBRA_FAIL;
2580                 }
2581                 else
2582                 {
2583                     rset = rset_create_prox(rset_nmem, kc,
2584                                             kc->scope,
2585                                             *num_result_sets, *result_sets, 
2586                                             *zop->u.prox->ordered,
2587                                             (!zop->u.prox->exclusion ? 
2588                                              0 : *zop->u.prox->exclusion),
2589                                             *zop->u.prox->relationType,
2590                                             *zop->u.prox->distance );
2591                 }
2592                 break;
2593             default:
2594                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2595                 return ZEBRA_FAIL;
2596             }
2597             *num_result_sets = 1;
2598             *result_sets = nmem_malloc(stream, *num_result_sets * 
2599                                        sizeof(**result_sets));
2600             (*result_sets)[0] = rset;
2601         }
2602     }
2603     else if (zs->which == Z_RPNStructure_simple)
2604     {
2605         RSET rset;
2606         ZEBRA_RES res;
2607
2608         if (zs->u.simple->which == Z_Operand_APT)
2609         {
2610             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2611             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2612                                  attributeSet, hits_limit,
2613                                  stream, sort_sequence,
2614                                  num_bases, basenames, rset_nmem, &rset,
2615                                  kc);
2616             if (res != ZEBRA_OK)
2617                 return res;
2618         }
2619         else if (zs->u.simple->which == Z_Operand_resultSetId)
2620         {
2621             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2622             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2623             if (!rset)
2624             {
2625                 zebra_setError(zh, 
2626                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2627                                zs->u.simple->u.resultSetId);
2628                 return ZEBRA_FAIL;
2629             }
2630             rset_dup(rset);
2631         }
2632         else
2633         {
2634             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2635             return ZEBRA_FAIL;
2636         }
2637         *num_result_sets = 1;
2638         *result_sets = nmem_malloc(stream, *num_result_sets * 
2639                                    sizeof(**result_sets));
2640         (*result_sets)[0] = rset;
2641     }
2642     else
2643     {
2644         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2645         return ZEBRA_FAIL;
2646     }
2647     return ZEBRA_OK;
2648 }
2649
2650
2651
2652 /*
2653  * Local variables:
2654  * c-basic-offset: 4
2655  * c-file-style: "Stroustrup"
2656  * indent-tabs-mode: nil
2657  * End:
2658  * vim: shiftwidth=4 tabstop=8 expandtab
2659  */
2660