RPM: get version from IDMETA
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2011 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231         
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 static int term_100_icu(zebra_map_t zm,
238                         const char **src, WRBUF term_dict, int space_split,
239                         WRBUF display_term,
240                         int right_trunc)
241 {
242     int i;
243     const char *res_buf = 0;
244     size_t res_len = 0;
245     const char *display_buf;
246     size_t display_len;
247     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
248                                  &display_buf, &display_len))
249     {
250         *src += strlen(*src);
251         return 0;
252     }
253     wrbuf_write(display_term, display_buf, display_len);
254     if (right_trunc)
255     {
256         /* ICU sort keys seem to be of the form
257            basechars \x01 accents \x01 length
258            For now we'll just right truncate from basechars . This 
259            may give false hits due to accents not being used.
260         */
261         i = res_len;
262         while (--i >= 0 && res_buf[i] != '\x01')
263             ;
264         if (i > 0)
265         {
266             while (--i >= 0 && res_buf[i] != '\x01')
267                 ;
268         }
269         if (i == 0)
270         {  /* did not find base chars at all. Throw error */
271             return -1;
272         }
273         res_len = i; /* reduce res_len */
274     }
275     for (i = 0; i < res_len; i++)
276     {
277         if (strchr(REGEX_CHARS "\\", res_buf[i]))
278             wrbuf_putc(term_dict, '\\');
279         if (res_buf[i] < 32)
280             wrbuf_putc(term_dict, 1);
281             
282         wrbuf_putc(term_dict, res_buf[i]);
283     }
284     if (right_trunc)
285         wrbuf_puts(term_dict, ".*");
286     return 1;
287 }
288
289 /* term_100: handle term, where trunc = none(no operators at all) */
290 static int term_100(zebra_map_t zm,
291                     const char **src, WRBUF term_dict, int space_split,
292                     WRBUF display_term)
293 {
294     const char *s0;
295     const char **map;
296     int i = 0;
297
298     const char *space_start = 0;
299     const char *space_end = 0;
300
301     if (!term_pre(zm, src, 0, !space_split))
302         return 0;
303     s0 = *src;
304     while (*s0)
305     {
306         const char *s1 = s0;
307         int q_map_match = 0;
308         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
309         if (space_split)
310         {
311             if (**map == *CHR_SPACE)
312                 break;
313         }
314         else  /* complete subfield only. */
315         {
316             if (**map == *CHR_SPACE)
317             {   /* save space mapping for later  .. */
318                 space_start = s1;
319                 space_end = s0;
320                 continue;
321             }
322             else if (space_start)
323             {   /* reload last space */
324                 while (space_start < space_end)
325                 {
326                     if (strchr(REGEX_CHARS, *space_start))
327                         wrbuf_putc(term_dict, '\\');
328                     wrbuf_putc(display_term, *space_start);
329                     wrbuf_putc(term_dict, *space_start);
330                     space_start++;
331                                
332                 }
333                 /* and reset */
334                 space_start = space_end = 0;
335             }
336         }
337         i++;
338
339         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
340     }
341     *src = s0;
342     return i;
343 }
344
345 /* term_101: handle term, where trunc = Process # */
346 static int term_101(zebra_map_t zm,
347                     const char **src, WRBUF term_dict, int space_split,
348                     WRBUF display_term)
349 {
350     const char *s0;
351     const char **map;
352     int i = 0;
353
354     if (!term_pre(zm, src, "#", !space_split))
355         return 0;
356     s0 = *src;
357     while (*s0)
358     {
359         if (*s0 == '#')
360         {
361             i++;
362             wrbuf_puts(term_dict, ".*");
363             wrbuf_putc(display_term, *s0);
364             s0++;
365         }
366         else
367         {
368             const char *s1 = s0;
369             int q_map_match = 0;
370             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
371             if (space_split && **map == *CHR_SPACE)
372                 break;
373
374             i++;
375             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
376         }
377     }
378     *src = s0;
379     return i;
380 }
381
382 /* term_103: handle term, where trunc = re-2 (regular expressions) */
383 static int term_103(zebra_map_t zm, const char **src,
384                     WRBUF term_dict, int *errors, int space_split,
385                     WRBUF display_term)
386 {
387     int i = 0;
388     const char *s0;
389     const char **map;
390
391     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
392         return 0;
393     s0 = *src;
394     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
395         isdigit(((const unsigned char *)s0)[1]))
396     {
397         *errors = s0[1] - '0';
398         s0 += 3;
399         if (*errors > 3)
400             *errors = 3;
401     }
402     while (*s0)
403     {
404         if (strchr("^\\()[].*+?|-", *s0))
405         {
406             wrbuf_putc(display_term, *s0);
407             wrbuf_putc(term_dict, *s0);
408             s0++;
409             i++;
410         }
411         else
412         {
413             const char *s1 = s0;
414             int q_map_match = 0;
415             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
416             if (space_split && **map == *CHR_SPACE)
417                 break;
418
419             i++;
420             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
421         }
422     }
423     *src = s0;
424     
425     return i;
426 }
427
428 /* term_103: handle term, where trunc = re-1 (regular expressions) */
429 static int term_102(zebra_map_t zm, const char **src,
430                     WRBUF term_dict, int space_split, WRBUF display_term)
431 {
432     return term_103(zm, src, term_dict, NULL, space_split, display_term);
433 }
434
435
436 /* term_104: handle term, process ?n * # */
437 static int term_104(zebra_map_t zm, const char **src, 
438                     WRBUF term_dict, int space_split, WRBUF display_term)
439 {
440     const char *s0;
441     const char **map;
442     int i = 0;
443
444     if (!term_pre(zm, src, "?*#", !space_split))
445         return 0;
446     s0 = *src;
447     while (*s0)
448     {
449         if (*s0 == '?')
450         {
451             i++;
452             wrbuf_putc(display_term, *s0);
453             s0++;
454             if (*s0 >= '0' && *s0 <= '9')
455             {
456                 int limit = 0;
457                 while (*s0 >= '0' && *s0 <= '9')
458                 {
459                     limit = limit * 10 + (*s0 - '0');
460                     wrbuf_putc(display_term, *s0);
461                     s0++;
462                 }
463                 if (limit > 20)
464                     limit = 20;
465                 while (--limit >= 0)
466                 {
467                     wrbuf_puts(term_dict, ".?");
468                 }
469             }
470             else
471             {
472                 wrbuf_puts(term_dict, ".*");
473             }
474         }
475         else if (*s0 == '*')
476         {
477             i++;
478             wrbuf_puts(term_dict, ".*");
479             wrbuf_putc(display_term, *s0);
480             s0++;
481         }
482         else if (*s0 == '#')
483         {
484             i++;
485             wrbuf_puts(term_dict, ".");
486             wrbuf_putc(display_term, *s0);
487             s0++;
488         }
489         else
490         {
491             const char *s1 = s0;
492             int q_map_match = 0;
493             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
494             if (space_split && **map == *CHR_SPACE)
495                 break;
496
497             i++;
498             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
499         }
500     }
501     *src = s0;
502     return i;
503 }
504
505 /* term_105/106: handle term, process * ! and possibly right_truncate */
506 static int term_105(zebra_map_t zm, const char **src, 
507                     WRBUF term_dict, int space_split,
508                     WRBUF display_term, int right_truncate)
509 {
510     const char *s0;
511     const char **map;
512     int i = 0;
513
514     if (!term_pre(zm, src, "\\*!", !space_split))
515         return 0;
516     s0 = *src;
517     while (*s0)
518     {
519         if (*s0 == '*')
520         {
521             i++;
522             wrbuf_puts(term_dict, ".*");
523             wrbuf_putc(display_term, *s0);
524             s0++;
525         }
526         else if (*s0 == '!')
527         {
528             i++;
529             wrbuf_putc(term_dict, '.');
530             wrbuf_putc(display_term, *s0);
531             s0++;
532         }
533         else if (*s0 == '\\')
534         {
535             i++;
536             wrbuf_puts(term_dict, "\\\\");
537             wrbuf_putc(display_term, *s0);
538             s0++;
539         }
540         else
541         {
542             const char *s1 = s0;
543             int q_map_match = 0;
544             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
545             if (space_split && **map == *CHR_SPACE)
546                 break;
547
548             i++;
549             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
550         }
551     }
552     if (right_truncate)
553         wrbuf_puts(term_dict, ".*");
554     *src = s0;
555     return i;
556 }
557
558
559 /* gen_regular_rel - generate regular expression from relation
560  *  val:     border value (inclusive)
561  *  islt:    1 if <=; 0 if >=.
562  */
563 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
564 {
565     char dst_buf[20*5*20]; /* assuming enough for expansion */
566     char *dst = dst_buf;
567     int dst_p;
568     int w, d, i;
569     int pos = 0;
570     char numstr[20];
571
572     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
573     if (val >= 0)
574     {
575         if (islt)
576             strcpy(dst, "(-[0-9]+|(");
577         else
578             strcpy(dst, "((");
579     } 
580     else
581     {
582         if (!islt)
583         {
584             strcpy(dst, "([0-9]+|-(");
585             islt = 1;
586         }
587         else
588         {
589             strcpy(dst, "(-(");
590             islt = 0;
591         }
592         val = -val;
593     }
594     dst_p = strlen(dst);
595     sprintf(numstr, "%d", val);
596     for (w = strlen(numstr); --w >= 0; pos++)
597     {
598         d = numstr[w];
599         if (pos > 0)
600         {
601             if (islt)
602             {
603                 if (d == '0')
604                     continue;
605                 d--;
606             } 
607             else
608             {
609                 if (d == '9')
610                     continue;
611                 d++;
612             }
613         }
614         
615         strcpy(dst + dst_p, numstr);
616         dst_p = strlen(dst) - pos - 1;
617
618         if (islt)
619         {
620             if (d != '0')
621             {
622                 dst[dst_p++] = '[';
623                 dst[dst_p++] = '0';
624                 dst[dst_p++] = '-';
625                 dst[dst_p++] = d;
626                 dst[dst_p++] = ']';
627             }
628             else
629                 dst[dst_p++] = d;
630         }
631         else
632         {
633             if (d != '9')
634             { 
635                 dst[dst_p++] = '[';
636                 dst[dst_p++] = d;
637                 dst[dst_p++] = '-';
638                 dst[dst_p++] = '9';
639                 dst[dst_p++] = ']';
640             }
641             else
642                 dst[dst_p++] = d;
643         }
644         for (i = 0; i<pos; i++)
645         {
646             dst[dst_p++] = '[';
647             dst[dst_p++] = '0';
648             dst[dst_p++] = '-';
649             dst[dst_p++] = '9';
650             dst[dst_p++] = ']';
651         }
652         dst[dst_p++] = '|';
653     }
654     dst[dst_p] = '\0';
655     if (islt)
656     {
657         /* match everything less than 10^(pos-1) */
658         strcat(dst, "0*");
659         for (i = 1; i<pos; i++)
660             strcat(dst, "[0-9]?");
661     }
662     else
663     {
664         /* match everything greater than 10^pos */
665         for (i = 0; i <= pos; i++)
666             strcat(dst, "[0-9]");
667         strcat(dst, "[0-9]*");
668     }
669     strcat(dst, "))");
670     wrbuf_puts(term_dict, dst);
671 }
672
673 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
674 {
675     const char *src = wrbuf_cstr(wsrc);
676     if (src[*indx] == '\\')
677     {
678         wrbuf_putc(term_p, src[*indx]);
679         (*indx)++;
680     }
681     wrbuf_putc(term_p, src[*indx]);
682     (*indx)++;
683 }
684
685 /*
686  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
687  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
688  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
689  *              ([^-a].*|a[^-b].*|ab[c-].*)
690  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
691  *              ([^a-].*|a[^b-].*|ab[^c-].*)
692  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
693  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
694  */
695 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
696                            const char **term_sub, WRBUF term_dict,
697                            const Odr_oid *attributeSet,
698                            zebra_map_t zm, int space_split, 
699                            WRBUF display_term,
700                            int *error_code)
701 {
702     AttrType relation;
703     int relation_value;
704     int i;
705     WRBUF term_component = wrbuf_alloc();
706
707     attr_init_APT(&relation, zapt, 2);
708     relation_value = attr_find(&relation, NULL);
709
710     *error_code = 0;
711     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
712     switch (relation_value)
713     {
714     case 1:
715         if (!term_100(zm, term_sub, term_component, space_split, display_term))
716         {
717             wrbuf_destroy(term_component);
718             return 0;
719         }
720         yaz_log(log_level_rpn, "Relation <");
721         
722         wrbuf_putc(term_dict, '(');
723         for (i = 0; i < wrbuf_len(term_component); )
724         {
725             int j = 0;
726             
727             if (i)
728                 wrbuf_putc(term_dict, '|');
729             while (j < i)
730                 string_rel_add_char(term_dict, term_component, &j);
731
732             wrbuf_putc(term_dict, '[');
733
734             wrbuf_putc(term_dict, '^');
735             
736             wrbuf_putc(term_dict, 1);
737             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
738             
739             string_rel_add_char(term_dict, term_component, &i);
740             wrbuf_putc(term_dict, '-');
741             
742             wrbuf_putc(term_dict, ']');
743             wrbuf_putc(term_dict, '.');
744             wrbuf_putc(term_dict, '*');
745         }
746         wrbuf_putc(term_dict, ')');
747         break;
748     case 2:
749         if (!term_100(zm, term_sub, term_component, space_split, display_term))
750         {
751             wrbuf_destroy(term_component);
752             return 0;
753         }
754         yaz_log(log_level_rpn, "Relation <=");
755
756         wrbuf_putc(term_dict, '(');
757         for (i = 0; i < wrbuf_len(term_component); )
758         {
759             int j = 0;
760
761             while (j < i)
762                 string_rel_add_char(term_dict, term_component, &j);
763             wrbuf_putc(term_dict, '[');
764
765             wrbuf_putc(term_dict, '^');
766
767             wrbuf_putc(term_dict, 1);
768             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
769
770             string_rel_add_char(term_dict, term_component, &i);
771             wrbuf_putc(term_dict, '-');
772
773             wrbuf_putc(term_dict, ']');
774             wrbuf_putc(term_dict, '.');
775             wrbuf_putc(term_dict, '*');
776
777             wrbuf_putc(term_dict, '|');
778         }
779         for (i = 0; i < wrbuf_len(term_component); )
780             string_rel_add_char(term_dict, term_component, &i);
781         wrbuf_putc(term_dict, ')');
782         break;
783     case 5:
784         if (!term_100(zm, term_sub, term_component, space_split, display_term))
785         {
786             wrbuf_destroy(term_component);
787             return 0;
788         }
789         yaz_log(log_level_rpn, "Relation >");
790
791         wrbuf_putc(term_dict, '(');
792         for (i = 0; i < wrbuf_len(term_component); )
793         {
794             int j = 0;
795
796             while (j < i)
797                 string_rel_add_char(term_dict, term_component, &j);
798             wrbuf_putc(term_dict, '[');
799             
800             wrbuf_putc(term_dict, '^');
801             wrbuf_putc(term_dict, '-');
802             string_rel_add_char(term_dict, term_component, &i);
803
804             wrbuf_putc(term_dict, ']');
805             wrbuf_putc(term_dict, '.');
806             wrbuf_putc(term_dict, '*');
807
808             wrbuf_putc(term_dict, '|');
809         }
810         for (i = 0; i < wrbuf_len(term_component); )
811             string_rel_add_char(term_dict, term_component, &i);
812         wrbuf_putc(term_dict, '.');
813         wrbuf_putc(term_dict, '+');
814         wrbuf_putc(term_dict, ')');
815         break;
816     case 4:
817         if (!term_100(zm, term_sub, term_component, space_split, display_term))
818         {
819             wrbuf_destroy(term_component);
820             return 0;
821         }
822         yaz_log(log_level_rpn, "Relation >=");
823
824         wrbuf_putc(term_dict, '(');
825         for (i = 0; i < wrbuf_len(term_component); )
826         {
827             int j = 0;
828
829             if (i)
830                 wrbuf_putc(term_dict, '|');
831             while (j < i)
832                 string_rel_add_char(term_dict, term_component, &j);
833             wrbuf_putc(term_dict, '[');
834
835             if (i < wrbuf_len(term_component)-1)
836             {
837                 wrbuf_putc(term_dict, '^');
838                 wrbuf_putc(term_dict, '-');
839                 string_rel_add_char(term_dict, term_component, &i);
840             }
841             else
842             {
843                 string_rel_add_char(term_dict, term_component, &i);
844                 wrbuf_putc(term_dict, '-');
845             }
846             wrbuf_putc(term_dict, ']');
847             wrbuf_putc(term_dict, '.');
848             wrbuf_putc(term_dict, '*');
849         }
850         wrbuf_putc(term_dict, ')');
851         break;
852     case 3:
853     case 102:
854     case -1:
855         if (!**term_sub)
856             return 1;
857         yaz_log(log_level_rpn, "Relation =");
858         if (!term_100(zm, term_sub, term_component, space_split, display_term))
859         {
860             wrbuf_destroy(term_component);
861             return 0;
862         }
863         wrbuf_puts(term_dict, "(");
864         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
865         wrbuf_puts(term_dict, ")");
866         break;
867     case 103:
868         yaz_log(log_level_rpn, "Relation always matches");
869         /* skip to end of term (we don't care what it is) */
870         while (**term_sub != '\0')
871             (*term_sub)++;
872         break;
873     default:
874         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
875         wrbuf_destroy(term_component);
876         return 0;
877     }
878     wrbuf_destroy(term_component);
879     return 1;
880 }
881
882 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
883                              const char **term_sub, 
884                              WRBUF term_dict,
885                              const Odr_oid *attributeSet, NMEM stream,
886                              struct grep_info *grep_info,
887                              const char *index_type, int complete_flag,
888                              WRBUF display_term,
889                              const char *xpath_use,
890                              struct ord_list **ol,
891                              zebra_map_t zm);
892
893 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
894                                 Z_AttributesPlusTerm *zapt,
895                                 zint *hits_limit_value,
896                                 const char **term_ref_id_str,
897                                 NMEM nmem)
898 {
899     AttrType term_ref_id_attr;
900     AttrType hits_limit_attr;
901     int term_ref_id_int;
902     zint hits_limit_from_attr;
903  
904     attr_init_APT(&hits_limit_attr, zapt, 11);
905     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
906
907     attr_init_APT(&term_ref_id_attr, zapt, 10);
908     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
909     if (term_ref_id_int >= 0)
910     {
911         char *res = nmem_malloc(nmem, 20);
912         sprintf(res, "%d", term_ref_id_int);
913         *term_ref_id_str = res;
914     }
915     if (hits_limit_from_attr != -1)
916         *hits_limit_value = hits_limit_from_attr;
917
918     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
919             *term_ref_id_str ? *term_ref_id_str : "none",
920             *hits_limit_value);
921     return ZEBRA_OK;
922 }
923
924 /** \brief search for term (which may be truncated)
925  */
926 static ZEBRA_RES search_term(ZebraHandle zh,
927                              Z_AttributesPlusTerm *zapt,
928                              const char **term_sub, 
929                              const Odr_oid *attributeSet,
930                              zint hits_limit, NMEM stream,
931                              struct grep_info *grep_info,
932                              const char *index_type, int complete_flag,
933                              const char *rank_type, 
934                              const char *xpath_use,
935                              NMEM rset_nmem,
936                              RSET *rset,
937                              struct rset_key_control *kc,
938                              zebra_map_t zm)
939 {
940     ZEBRA_RES res;
941     struct ord_list *ol;
942     zint hits_limit_value = hits_limit;
943     const char *term_ref_id_str = 0;
944     WRBUF term_dict = wrbuf_alloc();
945     WRBUF display_term = wrbuf_alloc();
946     *rset = 0;
947     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
948                           stream);
949     grep_info->isam_p_indx = 0;
950     res = string_term(zh, zapt, term_sub, term_dict,
951                       attributeSet, stream, grep_info,
952                       index_type, complete_flag,
953                       display_term, xpath_use, &ol, zm);
954     wrbuf_destroy(term_dict);
955     if (res == ZEBRA_OK && *term_sub)
956     {
957         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
958         *rset = rset_trunc(zh, grep_info->isam_p_buf,
959                            grep_info->isam_p_indx, wrbuf_buf(display_term),
960                            wrbuf_len(display_term), rank_type, 
961                            1 /* preserve pos */,
962                            zapt->term->which, rset_nmem,
963                            kc, kc->scope, ol, index_type, hits_limit_value,
964                            term_ref_id_str);
965         if (!*rset)
966             res = ZEBRA_FAIL;
967     }
968     wrbuf_destroy(display_term);
969     return res;
970 }
971
972 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
973                              const char **term_sub, 
974                              WRBUF term_dict,
975                              const Odr_oid *attributeSet, NMEM stream,
976                              struct grep_info *grep_info,
977                              const char *index_type, int complete_flag,
978                              WRBUF display_term,
979                              const char *xpath_use,
980                              struct ord_list **ol,
981                              zebra_map_t zm)
982 {
983     int r;
984     AttrType truncation;
985     int truncation_value;
986     const char *termp;
987     struct rpn_char_map_info rcmi;
988
989     int space_split = complete_flag ? 0 : 1;
990     int ord = -1;
991     int regex_range = 0;
992     int max_pos, prefix_len = 0;
993     int relation_error;
994     char ord_buf[32];
995     int ord_len, i;
996
997     *ol = ord_list_create(stream);
998
999     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1000     attr_init_APT(&truncation, zapt, 5);
1001     truncation_value = attr_find(&truncation, NULL);
1002     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1003
1004     termp = *term_sub; /* start of term for each database */
1005     
1006     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1007                           attributeSet, &ord) != ZEBRA_OK)
1008     {
1009         *term_sub = 0;
1010         return ZEBRA_FAIL;
1011     }
1012     
1013     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1014     
1015     *ol = ord_list_append(stream, *ol, ord);
1016     ord_len = key_SU_encode(ord, ord_buf);
1017     
1018     wrbuf_putc(term_dict, '(');
1019     
1020     for (i = 0; i<ord_len; i++)
1021     {
1022         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1023         wrbuf_putc(term_dict, ord_buf[i]);
1024     }
1025     wrbuf_putc(term_dict, ')');
1026     
1027     prefix_len = wrbuf_len(term_dict);
1028
1029     if (zebra_maps_is_icu(zm))
1030     {
1031         int relation_value;
1032         AttrType relation;
1033         
1034         attr_init_APT(&relation, zapt, 2);
1035         relation_value = attr_find(&relation, NULL);
1036         if (relation_value == 103) /* always matches */
1037             termp += strlen(termp); /* move to end of term */
1038         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1039         {
1040             /* ICU case */
1041             switch (truncation_value)
1042             {
1043             case -1:         /* not specified */
1044             case 100:        /* do not truncate */
1045                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1046                 {
1047                     *term_sub = 0;
1048                     return ZEBRA_OK;
1049                 }
1050                 break;
1051             case 1:          /* right truncation */
1052                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1053                 {
1054                     *term_sub = 0;
1055                     return ZEBRA_OK;
1056                 }
1057                 break;
1058             default:
1059                 zebra_setError_zint(zh,
1060                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1061                                     truncation_value);
1062                 return ZEBRA_FAIL;
1063             }
1064         }
1065         else
1066         {
1067             zebra_setError_zint(zh,
1068                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1069                                 relation_value);
1070             return ZEBRA_FAIL;
1071         }
1072     }
1073     else
1074     {
1075         /* non-ICU case. using string.chr and friends */
1076         switch (truncation_value)
1077         {
1078         case -1:         /* not specified */
1079         case 100:        /* do not truncate */
1080             if (!string_relation(zh, zapt, &termp, term_dict,
1081                                  attributeSet,
1082                                  zm, space_split, display_term,
1083                                  &relation_error))
1084             {
1085                 if (relation_error)
1086                 {
1087                     zebra_setError(zh, relation_error, 0);
1088                     return ZEBRA_FAIL;
1089                 }
1090                 *term_sub = 0;
1091                 return ZEBRA_OK;
1092             }
1093             break;
1094         case 1:          /* right truncation */
1095             wrbuf_putc(term_dict, '(');
1096             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1097             {
1098                 *term_sub = 0;
1099                 return ZEBRA_OK;
1100             }
1101             wrbuf_puts(term_dict, ".*)");
1102             break;
1103         case 2:          /* left truncation */
1104             wrbuf_puts(term_dict, "(.*");
1105             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1106             {
1107                 *term_sub = 0;
1108                 return ZEBRA_OK;
1109             }
1110             wrbuf_putc(term_dict, ')');
1111             break;
1112         case 3:          /* left&right truncation */
1113             wrbuf_puts(term_dict, "(.*");
1114             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1115             {
1116                 *term_sub = 0;
1117                 return ZEBRA_OK;
1118             }
1119             wrbuf_puts(term_dict, ".*)");
1120             break;
1121         case 101:        /* process # in term */
1122             wrbuf_putc(term_dict, '(');
1123             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1124             {
1125                 *term_sub = 0;
1126                 return ZEBRA_OK;
1127             }
1128             wrbuf_puts(term_dict, ")");
1129             break;
1130         case 102:        /* Regexp-1 */
1131             wrbuf_putc(term_dict, '(');
1132             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             wrbuf_putc(term_dict, ')');
1138             break;
1139         case 103:       /* Regexp-2 */
1140             regex_range = 1;
1141             wrbuf_putc(term_dict, '(');
1142             if (!term_103(zm, &termp, term_dict, &regex_range,
1143                           space_split, display_term))
1144             {
1145                 *term_sub = 0;
1146                 return ZEBRA_OK;
1147             }
1148             wrbuf_putc(term_dict, ')');
1149             break;
1150         case 104:        /* process ?n * # term */
1151             wrbuf_putc(term_dict, '(');
1152             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1153             {
1154                 *term_sub = 0;
1155                 return ZEBRA_OK;
1156             }
1157             wrbuf_putc(term_dict, ')');
1158             break;
1159         case 105:        /* process * ! in term and right truncate */
1160             wrbuf_putc(term_dict, '(');
1161             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1162             {
1163                 *term_sub = 0;
1164                 return ZEBRA_OK;
1165             }
1166             wrbuf_putc(term_dict, ')');
1167             break;
1168         case 106:        /* process * ! in term */
1169             wrbuf_putc(term_dict, '(');
1170             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1171             {
1172                 *term_sub = 0;
1173                 return ZEBRA_OK;
1174             }
1175             wrbuf_putc(term_dict, ')');
1176             break;
1177         default:
1178             zebra_setError_zint(zh,
1179                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1180                                 truncation_value);
1181             return ZEBRA_FAIL;
1182         }
1183     }
1184     if (1)
1185     {
1186         char buf[1000];
1187         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1188         esc_str(buf, sizeof(buf), input, strlen(input));
1189     }
1190     {
1191         WRBUF pr_wr = wrbuf_alloc();
1192
1193         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1194         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1195         wrbuf_destroy(pr_wr);
1196     }
1197     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1198                          grep_info, &max_pos, 
1199                          ord_len /* number of "exact" chars */,
1200                          grep_handle);
1201     if (r == 1)
1202         zebra_set_partial_result(zh);
1203     else if (r)
1204         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1205     *term_sub = termp;
1206     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1207     return ZEBRA_OK;
1208 }
1209
1210
1211
1212 static void grep_info_delete(struct grep_info *grep_info)
1213 {
1214 #ifdef TERM_COUNT
1215     xfree(grep_info->term_no);
1216 #endif
1217     xfree(grep_info->isam_p_buf);
1218 }
1219
1220 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1221                                    Z_AttributesPlusTerm *zapt,
1222                                    struct grep_info *grep_info,
1223                                    const char *index_type)
1224 {
1225 #ifdef TERM_COUNT
1226     grep_info->term_no = 0;
1227 #endif
1228     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1229     grep_info->isam_p_size = 0;
1230     grep_info->isam_p_buf = NULL;
1231     grep_info->zh = zh;
1232     grep_info->index_type = index_type;
1233     grep_info->termset = 0;
1234     if (zapt)
1235     {
1236         AttrType truncmax;
1237         int truncmax_value;
1238
1239         attr_init_APT(&truncmax, zapt, 13);
1240         truncmax_value = attr_find(&truncmax, NULL);
1241         if (truncmax_value != -1)
1242             grep_info->trunc_max = truncmax_value;
1243     }
1244     if (zapt)
1245     {
1246         AttrType termset;
1247         int termset_value_numeric;
1248         const char *termset_value_string;
1249
1250         attr_init_APT(&termset, zapt, 8);
1251         termset_value_numeric =
1252             attr_find_ex(&termset, NULL, &termset_value_string);
1253         if (termset_value_numeric != -1)
1254         {
1255 #if TERMSET_DISABLE
1256             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1257             return ZEBRA_FAIL;
1258 #else
1259             char resname[32];
1260             const char *termset_name = 0;
1261             if (termset_value_numeric != -2)
1262             {
1263                 
1264                 sprintf(resname, "%d", termset_value_numeric);
1265                 termset_name = resname;
1266             }
1267             else
1268                 termset_name = termset_value_string;
1269             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1270             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1271             if (!grep_info->termset)
1272             {
1273                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1274                 return ZEBRA_FAIL;
1275             }
1276 #endif
1277         }
1278     }
1279     return ZEBRA_OK;
1280 }
1281
1282 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1283                                      Z_AttributesPlusTerm *zapt,
1284                                      const char *termz,
1285                                      const Odr_oid *attributeSet,
1286                                      zint hits_limit,
1287                                      NMEM stream,
1288                                      const char *index_type, int complete_flag,
1289                                      const char *rank_type,
1290                                      const char *xpath_use,
1291                                      NMEM rset_nmem,
1292                                      RSET **result_sets, int *num_result_sets,
1293                                      struct rset_key_control *kc,
1294                                      zebra_map_t zm)
1295 {
1296     struct grep_info grep_info;
1297     const char *termp = termz;
1298     int alloc_sets = 0;
1299     
1300     *num_result_sets = 0;
1301     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1302         return ZEBRA_FAIL;
1303     while (1)
1304     { 
1305         ZEBRA_RES res;
1306
1307         if (alloc_sets == *num_result_sets)
1308         {
1309             int add = 10;
1310             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1311                                               sizeof(*rnew));
1312             if (alloc_sets)
1313                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1314             alloc_sets = alloc_sets + add;
1315             *result_sets = rnew;
1316         }
1317         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1318                           stream, &grep_info,
1319                           index_type, complete_flag,
1320                           rank_type,
1321                           xpath_use, rset_nmem,
1322                           &(*result_sets)[*num_result_sets],
1323                           kc, zm);
1324         if (res != ZEBRA_OK)
1325         {
1326             int i;
1327             for (i = 0; i < *num_result_sets; i++)
1328                 rset_delete((*result_sets)[i]);
1329             grep_info_delete(&grep_info);
1330             return res;
1331         }
1332         if ((*result_sets)[*num_result_sets] == 0)
1333             break;
1334         (*num_result_sets)++;
1335
1336         if (!*termp)
1337             break;
1338     }
1339     grep_info_delete(&grep_info);
1340     return ZEBRA_OK;
1341 }
1342                                
1343 /**
1344    \brief Create result set(s) for list of terms
1345    \param zh Zebra Handle
1346    \param zapt Attributes Plust Term (RPN leaf)
1347    \param termz term as used in query but converted to UTF-8
1348    \param attributeSet default attribute set
1349    \param stream memory for result
1350    \param index_type register type ("w", "p",..)
1351    \param complete_flag whether it's phrases or not
1352    \param rank_type term flags for ranking
1353    \param xpath_use use attribute for X-Path (-1 for no X-path)
1354    \param rset_nmem memory for result sets
1355    \param result_sets output result set for each term in list (output)
1356    \param num_result_sets number of output result sets
1357    \param kc rset key control to be used for created result sets
1358 */
1359 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1360                                    Z_AttributesPlusTerm *zapt,
1361                                    const char *termz,
1362                                    const Odr_oid *attributeSet,
1363                                    zint hits_limit,
1364                                    NMEM stream,
1365                                    const char *index_type, int complete_flag,
1366                                    const char *rank_type,
1367                                    const char *xpath_use,
1368                                    NMEM rset_nmem,
1369                                    RSET **result_sets, int *num_result_sets,
1370                                    struct rset_key_control *kc)
1371 {
1372     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1373     if (zebra_maps_is_icu(zm))
1374         zebra_map_tokenize_start(zm, termz, strlen(termz));
1375     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1376                                stream, index_type, complete_flag,
1377                                rank_type, xpath_use,
1378                                rset_nmem, result_sets, num_result_sets,
1379                                kc, zm);
1380 }
1381
1382
1383 /** \brief limit a search by position - returns result set
1384  */
1385 static ZEBRA_RES search_position(ZebraHandle zh,
1386                                  Z_AttributesPlusTerm *zapt,
1387                                  const Odr_oid *attributeSet,
1388                                  const char *index_type,
1389                                  NMEM rset_nmem,
1390                                  RSET *rset,
1391                                  struct rset_key_control *kc)
1392 {
1393     int position_value;
1394     AttrType position;
1395     int ord = -1;
1396     char ord_buf[32];
1397     char term_dict[100];
1398     int ord_len;
1399     char *val;
1400     ISAM_P isam_p;
1401     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1402     
1403     attr_init_APT(&position, zapt, 3);
1404     position_value = attr_find(&position, NULL);
1405     switch(position_value)
1406     {
1407     case 3:
1408     case -1:
1409         return ZEBRA_OK;
1410     case 1:
1411     case 2:
1412         break;
1413     default:
1414         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1415                             position_value);
1416         return ZEBRA_FAIL;
1417     }
1418
1419
1420     if (!zebra_maps_is_first_in_field(zm))
1421     {
1422         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1423                             position_value);
1424         return ZEBRA_FAIL;
1425     }
1426
1427     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1428                           attributeSet, &ord) != ZEBRA_OK)
1429     {
1430         return ZEBRA_FAIL;
1431     }
1432     ord_len = key_SU_encode(ord, ord_buf);
1433     memcpy(term_dict, ord_buf, ord_len);
1434     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1435     val = dict_lookup(zh->reg->dict, term_dict);
1436     if (val)
1437     {
1438         assert(*val == sizeof(ISAM_P));
1439         memcpy(&isam_p, val+1, sizeof(isam_p));
1440
1441         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1442                                        isam_p, 0);
1443     }
1444     return ZEBRA_OK;
1445 }
1446
1447 /** \brief returns result set for phrase search
1448  */
1449 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1450                                        Z_AttributesPlusTerm *zapt,
1451                                        const char *termz_org,
1452                                        const Odr_oid *attributeSet,
1453                                        zint hits_limit,
1454                                        NMEM stream,
1455                                        const char *index_type,
1456                                        int complete_flag,
1457                                        const char *rank_type,
1458                                        const char *xpath_use,
1459                                        NMEM rset_nmem,
1460                                        RSET *rset,
1461                                        struct rset_key_control *kc)
1462 {
1463     RSET *result_sets = 0;
1464     int num_result_sets = 0;
1465     ZEBRA_RES res =
1466         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1467                           stream, index_type, complete_flag,
1468                           rank_type, xpath_use,
1469                           rset_nmem,
1470                           &result_sets, &num_result_sets, kc);
1471     
1472     if (res != ZEBRA_OK)
1473         return res;
1474
1475     if (num_result_sets > 0)
1476     {
1477         RSET first_set = 0;
1478         res = search_position(zh, zapt, attributeSet, 
1479                               index_type,
1480                               rset_nmem, &first_set,
1481                               kc);
1482         if (res != ZEBRA_OK)
1483         {
1484             int i;
1485             for (i = 0; i<num_result_sets; i++)
1486                 rset_delete(result_sets[i]);
1487             return res;
1488         }
1489         if (first_set)
1490         {
1491             RSET *nsets = nmem_malloc(stream,
1492                                       sizeof(RSET) * (num_result_sets+1));
1493             nsets[0] = first_set;
1494             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1495             result_sets = nsets;
1496             num_result_sets++;
1497         }
1498     }
1499     if (num_result_sets == 0)
1500         *rset = rset_create_null(rset_nmem, kc, 0); 
1501     else if (num_result_sets == 1)
1502         *rset = result_sets[0];
1503     else
1504         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1505                                  num_result_sets, result_sets,
1506                                  1 /* ordered */, 0 /* exclusion */,
1507                                  3 /* relation */, 1 /* distance */);
1508     if (!*rset)
1509         return ZEBRA_FAIL;
1510     return ZEBRA_OK;
1511 }
1512
1513 /** \brief returns result set for or-list search
1514  */
1515 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1516                                         Z_AttributesPlusTerm *zapt,
1517                                         const char *termz_org,
1518                                         const Odr_oid *attributeSet,
1519                                         zint hits_limit,
1520                                         NMEM stream,
1521                                         const char *index_type, 
1522                                         int complete_flag,
1523                                         const char *rank_type,
1524                                         const char *xpath_use,
1525                                         NMEM rset_nmem,
1526                                         RSET *rset,
1527                                         struct rset_key_control *kc)
1528 {
1529     RSET *result_sets = 0;
1530     int num_result_sets = 0;
1531     int i;
1532     ZEBRA_RES res =
1533         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1534                           stream, index_type, complete_flag,
1535                           rank_type, xpath_use,
1536                           rset_nmem,
1537                           &result_sets, &num_result_sets, kc);
1538     if (res != ZEBRA_OK)
1539         return res;
1540
1541     for (i = 0; i<num_result_sets; i++)
1542     {
1543         RSET first_set = 0;
1544         res = search_position(zh, zapt, attributeSet, 
1545                               index_type,
1546                               rset_nmem, &first_set,
1547                               kc);
1548         if (res != ZEBRA_OK)
1549         {
1550             for (i = 0; i<num_result_sets; i++)
1551                 rset_delete(result_sets[i]);
1552             return res;
1553         }
1554
1555         if (first_set)
1556         {
1557             RSET tmp_set[2];
1558
1559             tmp_set[0] = first_set;
1560             tmp_set[1] = result_sets[i];
1561             
1562             result_sets[i] = rset_create_prox(
1563                 rset_nmem, kc, kc->scope,
1564                 2, tmp_set,
1565                 1 /* ordered */, 0 /* exclusion */,
1566                 3 /* relation */, 1 /* distance */);
1567         }
1568     }
1569     if (num_result_sets == 0)
1570         *rset = rset_create_null(rset_nmem, kc, 0); 
1571     else if (num_result_sets == 1)
1572         *rset = result_sets[0];
1573     else
1574         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1575                                num_result_sets, result_sets);
1576     if (!*rset)
1577         return ZEBRA_FAIL;
1578     return ZEBRA_OK;
1579 }
1580
1581 /** \brief returns result set for and-list search
1582  */
1583 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1584                                          Z_AttributesPlusTerm *zapt,
1585                                          const char *termz_org,
1586                                          const Odr_oid *attributeSet,
1587                                          zint hits_limit,
1588                                          NMEM stream,
1589                                          const char *index_type, 
1590                                          int complete_flag,
1591                                          const char *rank_type, 
1592                                          const char *xpath_use,
1593                                          NMEM rset_nmem,
1594                                          RSET *rset,
1595                                          struct rset_key_control *kc)
1596 {
1597     RSET *result_sets = 0;
1598     int num_result_sets = 0;
1599     int i;
1600     ZEBRA_RES res =
1601         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1602                           stream, index_type, complete_flag,
1603                           rank_type, xpath_use,
1604                           rset_nmem,
1605                           &result_sets, &num_result_sets,
1606                           kc);
1607     if (res != ZEBRA_OK)
1608         return res;
1609     for (i = 0; i<num_result_sets; i++)
1610     {
1611         RSET first_set = 0;
1612         res = search_position(zh, zapt, attributeSet, 
1613                               index_type,
1614                               rset_nmem, &first_set,
1615                               kc);
1616         if (res != ZEBRA_OK)
1617         {
1618             for (i = 0; i<num_result_sets; i++)
1619                 rset_delete(result_sets[i]);
1620             return res;
1621         }
1622
1623         if (first_set)
1624         {
1625             RSET tmp_set[2];
1626
1627             tmp_set[0] = first_set;
1628             tmp_set[1] = result_sets[i];
1629             
1630             result_sets[i] = rset_create_prox(
1631                 rset_nmem, kc, kc->scope,
1632                 2, tmp_set,
1633                 1 /* ordered */, 0 /* exclusion */,
1634                 3 /* relation */, 1 /* distance */);
1635         }
1636     }
1637
1638
1639     if (num_result_sets == 0)
1640         *rset = rset_create_null(rset_nmem, kc, 0); 
1641     else if (num_result_sets == 1)
1642         *rset = result_sets[0];
1643     else
1644         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1645                                 num_result_sets, result_sets);
1646     if (!*rset)
1647         return ZEBRA_FAIL;
1648     return ZEBRA_OK;
1649 }
1650
1651 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1652                             const char **term_sub,
1653                             WRBUF term_dict,
1654                             const Odr_oid *attributeSet,
1655                             struct grep_info *grep_info,
1656                             int *max_pos,
1657                             zebra_map_t zm,
1658                             WRBUF display_term,
1659                             int *error_code)
1660 {
1661     AttrType relation;
1662     int relation_value;
1663     int term_value;
1664     int r;
1665     WRBUF term_num = wrbuf_alloc();
1666
1667     *error_code = 0;
1668     attr_init_APT(&relation, zapt, 2);
1669     relation_value = attr_find(&relation, NULL);
1670
1671     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1672
1673     switch (relation_value)
1674     {
1675     case 1:
1676         yaz_log(log_level_rpn, "Relation <");
1677         if (!term_100(zm, term_sub, term_num, 1, display_term))
1678         { 
1679             wrbuf_destroy(term_num);
1680             return 0;
1681         }
1682         term_value = atoi(wrbuf_cstr(term_num));
1683         gen_regular_rel(term_dict, term_value-1, 1);
1684         break;
1685     case 2:
1686         yaz_log(log_level_rpn, "Relation <=");
1687         if (!term_100(zm, term_sub, term_num, 1, display_term))
1688         {
1689             wrbuf_destroy(term_num);
1690             return 0;
1691         }
1692         term_value = atoi(wrbuf_cstr(term_num));
1693         gen_regular_rel(term_dict, term_value, 1);
1694         break;
1695     case 4:
1696         yaz_log(log_level_rpn, "Relation >=");
1697         if (!term_100(zm, term_sub, term_num, 1, display_term))
1698         {
1699             wrbuf_destroy(term_num);
1700             return 0;
1701         }
1702         term_value = atoi(wrbuf_cstr(term_num));
1703         gen_regular_rel(term_dict, term_value, 0);
1704         break;
1705     case 5:
1706         yaz_log(log_level_rpn, "Relation >");
1707         if (!term_100(zm, term_sub, term_num, 1, display_term))
1708         {
1709             wrbuf_destroy(term_num);
1710             return 0;
1711         }
1712         term_value = atoi(wrbuf_cstr(term_num));
1713         gen_regular_rel(term_dict, term_value+1, 0);
1714         break;
1715     case -1:
1716     case 3:
1717         yaz_log(log_level_rpn, "Relation =");
1718         if (!term_100(zm, term_sub, term_num, 1, display_term))
1719         {
1720             wrbuf_destroy(term_num);
1721             return 0; 
1722         }
1723         term_value = atoi(wrbuf_cstr(term_num));
1724         wrbuf_printf(term_dict, "(0*%d)", term_value);
1725         break;
1726     case 103:
1727         /* term_tmp untouched.. */
1728         while (**term_sub != '\0')
1729             (*term_sub)++;
1730         break;
1731     default:
1732         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1733         wrbuf_destroy(term_num); 
1734         return 0;
1735     }
1736     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1737                          0, grep_info, max_pos, 0, grep_handle);
1738
1739     if (r == 1)
1740         zebra_set_partial_result(zh);
1741     else if (r)
1742         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1743     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1744     wrbuf_destroy(term_num);
1745     return 1;
1746 }
1747
1748 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1749                               const char **term_sub, 
1750                               WRBUF term_dict,
1751                               const Odr_oid *attributeSet, NMEM stream,
1752                               struct grep_info *grep_info,
1753                               const char *index_type, int complete_flag,
1754                               WRBUF display_term,
1755                               const char *xpath_use,
1756                               struct ord_list **ol)
1757 {
1758     const char *termp;
1759     struct rpn_char_map_info rcmi;
1760     int max_pos;
1761     int relation_error = 0;
1762     int ord, ord_len, i;
1763     char ord_buf[32];
1764     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1765     
1766     *ol = ord_list_create(stream);
1767
1768     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1769
1770     termp = *term_sub;
1771     
1772     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1773                           attributeSet, &ord) != ZEBRA_OK)
1774     {
1775         return ZEBRA_FAIL;
1776     }
1777     
1778     wrbuf_rewind(term_dict);
1779     
1780     *ol = ord_list_append(stream, *ol, ord);
1781     
1782     ord_len = key_SU_encode(ord, ord_buf);
1783     
1784     wrbuf_putc(term_dict, '(');
1785     for (i = 0; i < ord_len; i++)
1786     {
1787         wrbuf_putc(term_dict, 1);
1788         wrbuf_putc(term_dict, ord_buf[i]);
1789     }
1790     wrbuf_putc(term_dict, ')');
1791     
1792     if (!numeric_relation(zh, zapt, &termp, term_dict,
1793                           attributeSet, grep_info, &max_pos, zm,
1794                           display_term, &relation_error))
1795     {
1796         if (relation_error)
1797         {
1798             zebra_setError(zh, relation_error, 0);
1799             return ZEBRA_FAIL;
1800         }
1801         *term_sub = 0;
1802         return ZEBRA_OK;
1803     }
1804     *term_sub = termp;
1805     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1806     return ZEBRA_OK;
1807 }
1808
1809                                  
1810 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1811                                         Z_AttributesPlusTerm *zapt,
1812                                         const char *termz,
1813                                         const Odr_oid *attributeSet,
1814                                         zint hits_limit,
1815                                         NMEM stream,
1816                                         const char *index_type, 
1817                                         int complete_flag,
1818                                         const char *rank_type, 
1819                                         const char *xpath_use,
1820                                         NMEM rset_nmem,
1821                                         RSET *rset,
1822                                         struct rset_key_control *kc)
1823 {
1824     const char *termp = termz;
1825     RSET *result_sets = 0;
1826     int num_result_sets = 0;
1827     ZEBRA_RES res;
1828     struct grep_info grep_info;
1829     int alloc_sets = 0;
1830     zint hits_limit_value = hits_limit;
1831     const char *term_ref_id_str = 0;
1832
1833     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1834                           stream);
1835
1836     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1837     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1838         return ZEBRA_FAIL;
1839     while (1)
1840     { 
1841         struct ord_list *ol;
1842         WRBUF term_dict = wrbuf_alloc();
1843         WRBUF display_term = wrbuf_alloc();
1844         if (alloc_sets == num_result_sets)
1845         {
1846             int add = 10;
1847             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1848                                               sizeof(*rnew));
1849             if (alloc_sets)
1850                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1851             alloc_sets = alloc_sets + add;
1852             result_sets = rnew;
1853         }
1854         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1855         grep_info.isam_p_indx = 0;
1856         res = numeric_term(zh, zapt, &termp, term_dict,
1857                            attributeSet, stream, &grep_info,
1858                            index_type, complete_flag,
1859                            display_term, xpath_use, &ol);
1860         wrbuf_destroy(term_dict);
1861         if (res == ZEBRA_FAIL || termp == 0)
1862         {
1863             wrbuf_destroy(display_term);
1864             break;
1865         }
1866         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1867         result_sets[num_result_sets] =
1868             rset_trunc(zh, grep_info.isam_p_buf,
1869                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1870                        wrbuf_len(display_term), rank_type,
1871                        0 /* preserve position */,
1872                        zapt->term->which, rset_nmem, 
1873                        kc, kc->scope, ol, index_type,
1874                        hits_limit_value,
1875                        term_ref_id_str);
1876         wrbuf_destroy(display_term);
1877         if (!result_sets[num_result_sets])
1878             break;
1879         num_result_sets++;
1880         if (!*termp)
1881             break;
1882     }
1883     grep_info_delete(&grep_info);
1884
1885     if (res != ZEBRA_OK)
1886         return res;
1887     if (num_result_sets == 0)
1888         *rset = rset_create_null(rset_nmem, kc, 0);
1889     else if (num_result_sets == 1)
1890         *rset = result_sets[0];
1891     else
1892         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1893                                 num_result_sets, result_sets);
1894     if (!*rset)
1895         return ZEBRA_FAIL;
1896     return ZEBRA_OK;
1897 }
1898
1899 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1900                                       Z_AttributesPlusTerm *zapt,
1901                                       const char *termz,
1902                                       const Odr_oid *attributeSet,
1903                                       NMEM stream,
1904                                       const char *rank_type, NMEM rset_nmem,
1905                                       RSET *rset,
1906                                       struct rset_key_control *kc)
1907 {
1908     Record rec;
1909     zint sysno = atozint(termz);
1910     
1911     if (sysno <= 0)
1912         sysno = 0;
1913     rec = rec_get(zh->reg->records, sysno);
1914     if (!rec)
1915         sysno = 0;
1916
1917     rec_free(&rec);
1918
1919     if (sysno <= 0)
1920     {
1921         *rset = rset_create_null(rset_nmem, kc, 0);
1922     }
1923     else
1924     {
1925         RSFD rsfd;
1926         struct it_key key;
1927         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1928                                  res_get(zh->res, "setTmpDir"), 0);
1929         rsfd = rset_open(*rset, RSETF_WRITE);
1930         
1931         key.mem[0] = sysno;
1932         key.mem[1] = 1;
1933         key.len = 2;
1934         rset_write(rsfd, &key);
1935         rset_close(rsfd);
1936     }
1937     return ZEBRA_OK;
1938 }
1939
1940 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1941                                const Odr_oid *attributeSet, NMEM stream,
1942                                Z_SortKeySpecList *sort_sequence,
1943                                const char *rank_type,
1944                                NMEM rset_nmem,
1945                                RSET *rset,
1946                                struct rset_key_control *kc)
1947 {
1948     int i;
1949     int sort_relation_value;
1950     AttrType sort_relation_type;
1951     Z_SortKeySpec *sks;
1952     Z_SortKey *sk;
1953     char termz[20];
1954     
1955     attr_init_APT(&sort_relation_type, zapt, 7);
1956     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1957
1958     if (!sort_sequence->specs)
1959     {
1960         sort_sequence->num_specs = 10;
1961         sort_sequence->specs = (Z_SortKeySpec **)
1962             nmem_malloc(stream, sort_sequence->num_specs *
1963                         sizeof(*sort_sequence->specs));
1964         for (i = 0; i<sort_sequence->num_specs; i++)
1965             sort_sequence->specs[i] = 0;
1966     }
1967     if (zapt->term->which != Z_Term_general)
1968         i = 0;
1969     else
1970         i = atoi_n((char *) zapt->term->u.general->buf,
1971                    zapt->term->u.general->len);
1972     if (i >= sort_sequence->num_specs)
1973         i = 0;
1974     sprintf(termz, "%d", i);
1975
1976     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1977     sks->sortElement = (Z_SortElement *)
1978         nmem_malloc(stream, sizeof(*sks->sortElement));
1979     sks->sortElement->which = Z_SortElement_generic;
1980     sk = sks->sortElement->u.generic = (Z_SortKey *)
1981         nmem_malloc(stream, sizeof(*sk));
1982     sk->which = Z_SortKey_sortAttributes;
1983     sk->u.sortAttributes = (Z_SortAttributes *)
1984         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1985
1986     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1987     sk->u.sortAttributes->list = zapt->attributes;
1988
1989     sks->sortRelation = (Odr_int *)
1990         nmem_malloc(stream, sizeof(*sks->sortRelation));
1991     if (sort_relation_value == 1)
1992         *sks->sortRelation = Z_SortKeySpec_ascending;
1993     else if (sort_relation_value == 2)
1994         *sks->sortRelation = Z_SortKeySpec_descending;
1995     else 
1996         *sks->sortRelation = Z_SortKeySpec_ascending;
1997
1998     sks->caseSensitivity = (Odr_int *)
1999         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2000     *sks->caseSensitivity = 0;
2001
2002     sks->which = Z_SortKeySpec_null;
2003     sks->u.null = odr_nullval ();
2004     sort_sequence->specs[i] = sks;
2005     *rset = rset_create_null(rset_nmem, kc, 0);
2006     return ZEBRA_OK;
2007 }
2008
2009
2010 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2011                            const Odr_oid *attributeSet,
2012                            struct xpath_location_step *xpath, int max,
2013                            NMEM mem)
2014 {
2015     const Odr_oid *curAttributeSet = attributeSet;
2016     AttrType use;
2017     const char *use_string = 0;
2018     
2019     attr_init_APT(&use, zapt, 1);
2020     attr_find_ex(&use, &curAttributeSet, &use_string);
2021
2022     if (!use_string || *use_string != '/')
2023         return -1;
2024
2025     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2026 }
2027  
2028                
2029
2030 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2031                         const char *index_type, const char *term, 
2032                         const char *xpath_use,
2033                         NMEM rset_nmem,
2034                         struct rset_key_control *kc)
2035 {
2036     struct grep_info grep_info;
2037     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2038                                            zinfo_index_category_index,
2039                                            index_type, xpath_use);
2040     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2041         return rset_create_null(rset_nmem, kc, 0);
2042     
2043     if (ord < 0)
2044         return rset_create_null(rset_nmem, kc, 0);
2045     else
2046     {
2047         int i, r, max_pos;
2048         char ord_buf[32];
2049         RSET rset;
2050         WRBUF term_dict = wrbuf_alloc();
2051         int ord_len = key_SU_encode(ord, ord_buf);
2052         int term_type = Z_Term_characterString;
2053         const char *flags = "void";
2054
2055         wrbuf_putc(term_dict, '(');
2056         for (i = 0; i<ord_len; i++)
2057         {
2058             wrbuf_putc(term_dict, 1);
2059             wrbuf_putc(term_dict, ord_buf[i]);
2060         }
2061         wrbuf_putc(term_dict, ')');
2062         wrbuf_puts(term_dict, term);
2063         
2064         grep_info.isam_p_indx = 0;
2065         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2066                              &grep_info, &max_pos, 0, grep_handle);
2067         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2068                 grep_info.isam_p_indx);
2069         rset = rset_trunc(zh, grep_info.isam_p_buf,
2070                           grep_info.isam_p_indx, term, strlen(term),
2071                           flags, 1, term_type, rset_nmem,
2072                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2073                           0 /* term_ref_id_str */);
2074         grep_info_delete(&grep_info);
2075         wrbuf_destroy(term_dict);
2076         return rset;
2077     }
2078 }
2079
2080 static
2081 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2082                            NMEM stream, const char *rank_type, RSET rset,
2083                            int xpath_len, struct xpath_location_step *xpath,
2084                            NMEM rset_nmem,
2085                            RSET *rset_out,
2086                            struct rset_key_control *kc)
2087 {
2088     int i;
2089     int always_matches = rset ? 0 : 1;
2090
2091     if (xpath_len < 0)
2092     {
2093         *rset_out = rset;
2094         return ZEBRA_OK;
2095     }
2096
2097     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2098     for (i = 0; i<xpath_len; i++)
2099     {
2100         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2101
2102     }
2103
2104     /*
2105     //a    ->    a/.*
2106     //a/b  ->    b/a/.*
2107     /a     ->    a/
2108     /a/b   ->    b/a/
2109
2110     /      ->    none
2111
2112     a[@attr = value]/b[@other = othervalue]
2113
2114     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2115     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2116     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2117     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2118     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2119     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2120       
2121     */
2122
2123     dict_grep_cmap(zh->reg->dict, 0, 0);
2124     
2125     {
2126         int level = xpath_len;
2127         int first_path = 1;
2128         
2129         while (--level >= 0)
2130         {
2131             WRBUF xpath_rev = wrbuf_alloc();
2132             int i;
2133             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2134
2135             for (i = level; i >= 1; --i)
2136             {
2137                 const char *cp = xpath[i].part;
2138                 if (*cp)
2139                 {
2140                     for (; *cp; cp++)
2141                     {
2142                         if (*cp == '*')
2143                             wrbuf_puts(xpath_rev, "[^/]*");
2144                         else if (*cp == ' ')
2145                             wrbuf_puts(xpath_rev, "\001 ");
2146                         else
2147                             wrbuf_putc(xpath_rev, *cp);
2148
2149                         /* wrbuf_putc does not null-terminate , but
2150                            wrbuf_puts below ensures it does.. so xpath_rev
2151                            is OK iff length is > 0 */
2152                     }
2153                     wrbuf_puts(xpath_rev, "/");
2154                 }
2155                 else if (i == 1)  /* // case */
2156                     wrbuf_puts(xpath_rev, ".*");
2157             }
2158             if (xpath[level].predicate &&
2159                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2160                 xpath[level].predicate->u.relation.name[0])
2161             {
2162                 WRBUF wbuf = wrbuf_alloc();
2163                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2164                 if (xpath[level].predicate->u.relation.value)
2165                 {
2166                     const char *cp = xpath[level].predicate->u.relation.value;
2167                     wrbuf_putc(wbuf, '=');
2168                     
2169                     while (*cp)
2170                     {
2171                         if (strchr(REGEX_CHARS, *cp))
2172                             wrbuf_putc(wbuf, '\\');
2173                         wrbuf_putc(wbuf, *cp);
2174                         cp++;
2175                     }
2176                 }
2177                 rset_attr = xpath_trunc(
2178                     zh, stream, "0", wrbuf_cstr(wbuf), 
2179                     ZEBRA_XPATH_ATTR_NAME, 
2180                     rset_nmem, kc);
2181                 wrbuf_destroy(wbuf);
2182             } 
2183             else 
2184             {
2185                 if (!first_path)
2186                 {
2187                     wrbuf_destroy(xpath_rev);
2188                     continue;
2189                 }
2190             }
2191             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2192                     wrbuf_cstr(xpath_rev));
2193             if (wrbuf_len(xpath_rev))
2194             {
2195                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2196                                              wrbuf_cstr(xpath_rev),
2197                                              ZEBRA_XPATH_ELM_BEGIN, 
2198                                              rset_nmem, kc);
2199                 if (always_matches)
2200                     rset = rset_start_tag;
2201                 else
2202                 {
2203                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2204                                                wrbuf_cstr(xpath_rev),
2205                                                ZEBRA_XPATH_ELM_END, 
2206                                                rset_nmem, kc);
2207                     
2208                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2209                                                rset_start_tag, rset,
2210                                                rset_end_tag, rset_attr);
2211                 }
2212             }
2213             wrbuf_destroy(xpath_rev);
2214             first_path = 0;
2215         }
2216     }
2217     *rset_out = rset;
2218     return ZEBRA_OK;
2219 }
2220
2221 #define MAX_XPATH_STEPS 10
2222
2223 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2224                                      Z_AttributesPlusTerm *zapt,
2225                                      const Odr_oid *attributeSet,
2226                                      zint hits_limit, NMEM stream,
2227                                      Z_SortKeySpecList *sort_sequence,
2228                                      NMEM rset_nmem,
2229                                      RSET *rset,
2230                                      struct rset_key_control *kc);
2231
2232 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2233                                 const Odr_oid *attributeSet,
2234                                 zint hits_limit, NMEM stream,
2235                                 Z_SortKeySpecList *sort_sequence,
2236                                 int num_bases, const char **basenames, 
2237                                 NMEM rset_nmem,
2238                                 RSET *rset,
2239                                 struct rset_key_control *kc)
2240 {
2241     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2242     ZEBRA_RES res = ZEBRA_OK;
2243     int i;
2244     for (i = 0; i < num_bases; i++)
2245     {
2246
2247         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2248         {
2249             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2250                            basenames[i]);
2251             res = ZEBRA_FAIL;
2252             break;
2253         }
2254         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2255                                   sort_sequence,
2256                                   rset_nmem, rsets+i, kc);
2257         if (res != ZEBRA_OK)
2258             break;
2259     }
2260     if (res != ZEBRA_OK)
2261     {   /* must clean up the already created sets */
2262         while (--i >= 0)
2263             rset_delete(rsets[i]);
2264         *rset = 0;
2265     }
2266     else 
2267     {
2268         if (num_bases == 1)
2269             *rset = rsets[0];
2270         else if (num_bases == 0)
2271             *rset = rset_create_null(rset_nmem, kc, 0); 
2272         else
2273             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2274                                    num_bases, rsets);
2275     }
2276     return res;
2277 }
2278
2279 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2280                                      Z_AttributesPlusTerm *zapt,
2281                                      const Odr_oid *attributeSet,
2282                                      zint hits_limit, NMEM stream,
2283                                      Z_SortKeySpecList *sort_sequence,
2284                                      NMEM rset_nmem,
2285                                      RSET *rset,
2286                                      struct rset_key_control *kc)
2287 {
2288     ZEBRA_RES res = ZEBRA_OK;
2289     const char *index_type;
2290     char *search_type = NULL;
2291     char rank_type[128];
2292     int complete_flag;
2293     int sort_flag;
2294     char termz[IT_MAX_WORD+1];
2295     int xpath_len;
2296     const char *xpath_use = 0;
2297     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2298
2299     if (!log_level_set)
2300     {
2301         log_level_rpn = yaz_log_module_level("rpn");
2302         log_level_set = 1;
2303     }
2304     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2305                     rank_type, &complete_flag, &sort_flag);
2306     
2307     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2308     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2309     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2310     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2311
2312     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2313         return ZEBRA_FAIL;
2314
2315     if (sort_flag)
2316         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2317                              rank_type, rset_nmem, rset, kc);
2318     /* consider if an X-Path query is used */
2319     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2320                                 xpath, MAX_XPATH_STEPS, stream);
2321     if (xpath_len >= 0)
2322     {
2323         if (xpath[xpath_len-1].part[0] == '@') 
2324             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2325         else
2326             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2327
2328         if (1)
2329         {
2330             AttrType relation;
2331             int relation_value;
2332
2333             attr_init_APT(&relation, zapt, 2);
2334             relation_value = attr_find(&relation, NULL);
2335
2336             if (relation_value == 103) /* alwaysmatches */
2337             {
2338                 *rset = 0; /* signal no "term" set */
2339                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2340                                         xpath_len, xpath, rset_nmem, rset, kc);
2341             }
2342         }
2343     }
2344
2345     /* search using one of the various search type strategies
2346        termz is our UTF-8 search term
2347        attributeSet is top-level default attribute set 
2348        stream is ODR for search
2349        reg_id is the register type
2350        complete_flag is 1 for complete subfield, 0 for incomplete
2351        xpath_use is use-attribute to be used for X-Path search, 0 for none
2352     */
2353     if (!strcmp(search_type, "phrase"))
2354     {
2355         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2356                                     stream,
2357                                     index_type, complete_flag, rank_type,
2358                                     xpath_use,
2359                                     rset_nmem,
2360                                     rset, kc);
2361     }
2362     else if (!strcmp(search_type, "and-list"))
2363     {
2364         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2365                                       stream,
2366                                       index_type, complete_flag, rank_type,
2367                                       xpath_use,
2368                                       rset_nmem,
2369                                       rset, kc);
2370     }
2371     else if (!strcmp(search_type, "or-list"))
2372     {
2373         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2374                                      stream,
2375                                      index_type, complete_flag, rank_type,
2376                                      xpath_use,
2377                                      rset_nmem,
2378                                      rset, kc);
2379     }
2380     else if (!strcmp(search_type, "local"))
2381     {
2382         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2383                                    rank_type, rset_nmem, rset, kc);
2384     }
2385     else if (!strcmp(search_type, "numeric"))
2386     {
2387         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2388                                      stream,
2389                                      index_type, complete_flag, rank_type,
2390                                      xpath_use,
2391                                      rset_nmem,
2392                                      rset, kc);
2393     }
2394     else
2395     {
2396         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2397         res = ZEBRA_FAIL;
2398     }
2399     if (res != ZEBRA_OK)
2400         return res;
2401     if (!*rset)
2402         return ZEBRA_FAIL;
2403     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2404                             xpath_len, xpath, rset_nmem, rset, kc);
2405 }
2406
2407 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2408                                       const Odr_oid *attributeSet,
2409                                       zint hits_limit,
2410                                       NMEM stream, NMEM rset_nmem,
2411                                       Z_SortKeySpecList *sort_sequence,
2412                                       int num_bases, const char **basenames,
2413                                       RSET **result_sets, int *num_result_sets,
2414                                       Z_Operator *parent_op,
2415                                       struct rset_key_control *kc);
2416
2417 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2418                                    zint *approx_limit)
2419 {
2420     ZEBRA_RES res = ZEBRA_OK;
2421     if (zs->which == Z_RPNStructure_complex)
2422     {
2423         if (res == ZEBRA_OK)
2424             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2425                                            approx_limit);
2426         if (res == ZEBRA_OK)
2427             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2428                                            approx_limit);
2429     }
2430     else if (zs->which == Z_RPNStructure_simple)
2431     {
2432         if (zs->u.simple->which == Z_Operand_APT)
2433         {
2434             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2435             AttrType global_hits_limit_attr;
2436             int l;
2437             
2438             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2439             
2440             l = attr_find(&global_hits_limit_attr, NULL);
2441             if (l != -1)
2442                 *approx_limit = l;
2443         }
2444     }
2445     return res;
2446 }
2447
2448 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2449                          const Odr_oid *attributeSet, 
2450                          zint hits_limit,
2451                          NMEM stream, NMEM rset_nmem,
2452                          Z_SortKeySpecList *sort_sequence,
2453                          int num_bases, const char **basenames,
2454                          RSET *result_set)
2455 {
2456     RSET *result_sets = 0;
2457     int num_result_sets = 0;
2458     ZEBRA_RES res;
2459     struct rset_key_control *kc = zebra_key_control_create(zh);
2460
2461     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2462                                stream, rset_nmem,
2463                                sort_sequence, 
2464                                num_bases, basenames,
2465                                &result_sets, &num_result_sets,
2466                                0 /* no parent op */,
2467                                kc);
2468     if (res != ZEBRA_OK)
2469     {
2470         int i;
2471         for (i = 0; i<num_result_sets; i++)
2472             rset_delete(result_sets[i]);
2473         *result_set = 0;
2474     }
2475     else
2476     {
2477         assert(num_result_sets == 1);
2478         assert(result_sets);
2479         assert(*result_sets);
2480         *result_set = *result_sets;
2481     }
2482     (*kc->dec)(kc);
2483     return res;
2484 }
2485
2486 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2487                                const Odr_oid *attributeSet, zint hits_limit,
2488                                NMEM stream, NMEM rset_nmem,
2489                                Z_SortKeySpecList *sort_sequence,
2490                                int num_bases, const char **basenames,
2491                                RSET **result_sets, int *num_result_sets,
2492                                Z_Operator *parent_op,
2493                                struct rset_key_control *kc)
2494 {
2495     *num_result_sets = 0;
2496     if (zs->which == Z_RPNStructure_complex)
2497     {
2498         ZEBRA_RES res;
2499         Z_Operator *zop = zs->u.complex->roperator;
2500         RSET *result_sets_l = 0;
2501         int num_result_sets_l = 0;
2502         RSET *result_sets_r = 0;
2503         int num_result_sets_r = 0;
2504
2505         res = rpn_search_structure(zh, zs->u.complex->s1,
2506                                    attributeSet, hits_limit, stream, rset_nmem,
2507                                    sort_sequence,
2508                                    num_bases, basenames,
2509                                    &result_sets_l, &num_result_sets_l,
2510                                    zop, kc);
2511         if (res != ZEBRA_OK)
2512         {
2513             int i;
2514             for (i = 0; i<num_result_sets_l; i++)
2515                 rset_delete(result_sets_l[i]);
2516             return res;
2517         }
2518         res = rpn_search_structure(zh, zs->u.complex->s2,
2519                                    attributeSet, hits_limit, stream, rset_nmem,
2520                                    sort_sequence,
2521                                    num_bases, basenames,
2522                                    &result_sets_r, &num_result_sets_r,
2523                                    zop, kc);
2524         if (res != ZEBRA_OK)
2525         {
2526             int i;
2527             for (i = 0; i<num_result_sets_l; i++)
2528                 rset_delete(result_sets_l[i]);
2529             for (i = 0; i<num_result_sets_r; i++)
2530                 rset_delete(result_sets_r[i]);
2531             return res;
2532         }
2533
2534         /* make a new list of result for all children */
2535         *num_result_sets = num_result_sets_l + num_result_sets_r;
2536         *result_sets = nmem_malloc(stream, *num_result_sets * 
2537                                    sizeof(**result_sets));
2538         memcpy(*result_sets, result_sets_l, 
2539                num_result_sets_l * sizeof(**result_sets));
2540         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2541                num_result_sets_r * sizeof(**result_sets));
2542
2543         if (!parent_op || parent_op->which != zop->which
2544             || (zop->which != Z_Operator_and &&
2545                 zop->which != Z_Operator_or))
2546         {
2547             /* parent node different from this one (or non-present) */
2548             /* we must combine result sets now */
2549             RSET rset;
2550             switch (zop->which)
2551             {
2552             case Z_Operator_and:
2553                 rset = rset_create_and(rset_nmem, kc,
2554                                        kc->scope,
2555                                        *num_result_sets, *result_sets);
2556                 break;
2557             case Z_Operator_or:
2558                 rset = rset_create_or(rset_nmem, kc,
2559                                       kc->scope, 0, /* termid */
2560                                       *num_result_sets, *result_sets);
2561                 break;
2562             case Z_Operator_and_not:
2563                 rset = rset_create_not(rset_nmem, kc,
2564                                        kc->scope,
2565                                        (*result_sets)[0],
2566                                        (*result_sets)[1]);
2567                 break;
2568             case Z_Operator_prox:
2569                 if (zop->u.prox->which != Z_ProximityOperator_known)
2570                 {
2571                     zebra_setError(zh, 
2572                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2573                                    0);
2574                     return ZEBRA_FAIL;
2575                 }
2576                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2577                 {
2578                     zebra_setError_zint(zh,
2579                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2580                                         *zop->u.prox->u.known);
2581                     return ZEBRA_FAIL;
2582                 }
2583                 else
2584                 {
2585                     rset = rset_create_prox(rset_nmem, kc,
2586                                             kc->scope,
2587                                             *num_result_sets, *result_sets, 
2588                                             *zop->u.prox->ordered,
2589                                             (!zop->u.prox->exclusion ? 
2590                                              0 : *zop->u.prox->exclusion),
2591                                             *zop->u.prox->relationType,
2592                                             *zop->u.prox->distance );
2593                 }
2594                 break;
2595             default:
2596                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2597                 return ZEBRA_FAIL;
2598             }
2599             *num_result_sets = 1;
2600             *result_sets = nmem_malloc(stream, *num_result_sets * 
2601                                        sizeof(**result_sets));
2602             (*result_sets)[0] = rset;
2603         }
2604     }
2605     else if (zs->which == Z_RPNStructure_simple)
2606     {
2607         RSET rset;
2608         ZEBRA_RES res;
2609
2610         if (zs->u.simple->which == Z_Operand_APT)
2611         {
2612             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2613             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2614                                  attributeSet, hits_limit,
2615                                  stream, sort_sequence,
2616                                  num_bases, basenames, rset_nmem, &rset,
2617                                  kc);
2618             if (res != ZEBRA_OK)
2619                 return res;
2620         }
2621         else if (zs->u.simple->which == Z_Operand_resultSetId)
2622         {
2623             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2624             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2625             if (!rset)
2626             {
2627                 zebra_setError(zh, 
2628                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2629                                zs->u.simple->u.resultSetId);
2630                 return ZEBRA_FAIL;
2631             }
2632             rset_dup(rset);
2633         }
2634         else
2635         {
2636             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2637             return ZEBRA_FAIL;
2638         }
2639         *num_result_sets = 1;
2640         *result_sets = nmem_malloc(stream, *num_result_sets * 
2641                                    sizeof(**result_sets));
2642         (*result_sets)[0] = rset;
2643     }
2644     else
2645     {
2646         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2647         return ZEBRA_FAIL;
2648     }
2649     return ZEBRA_OK;
2650 }
2651
2652
2653
2654 /*
2655  * Local variables:
2656  * c-basic-offset: 4
2657  * c-file-style: "Stroustrup"
2658  * indent-tabs-mode: nil
2659  * End:
2660  * vim: shiftwidth=4 tabstop=8 expandtab
2661  */
2662