Fixed bug #1121: Crash for some searches with customized string.chr.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.13 2007-05-14 12:33:33 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode (&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211                     const char **src, WRBUF term_dict, int space_split,
212                     char *dst_term)
213 {
214     const char *s0;
215     const char **map;
216     int i = 0;
217     int j = 0;
218
219     const char *space_start = 0;
220     const char *space_end = 0;
221
222     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223         return 0;
224     s0 = *src;
225     while (*s0)
226     {
227         const char *s1 = s0;
228         int q_map_match = 0;
229         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
230                                 &q_map_match);
231         if (space_split)
232         {
233             if (**map == *CHR_SPACE)
234                 break;
235         }
236         else  /* complete subfield only. */
237         {
238             if (**map == *CHR_SPACE)
239             {   /* save space mapping for later  .. */
240                 space_start = s1;
241                 space_end = s0;
242                 continue;
243             }
244             else if (space_start)
245             {   /* reload last space */
246                 while (space_start < space_end)
247                 {
248                     if (strchr(REGEX_CHARS, *space_start))
249                         wrbuf_putc(term_dict, '\\');
250                     dst_term[j++] = *space_start;
251                     wrbuf_putc(term_dict, *space_start);
252                     space_start++;
253                                
254                 }
255                 /* and reset */
256                 space_start = space_end = 0;
257             }
258         }
259         /* add non-space char */
260         i++;
261         memcpy(dst_term+j, s1, s0 - s1);
262         j += (s0 - s1);
263         if (!q_map_match)
264         {
265             while (s1 < s0)
266             {
267                 if (strchr(REGEX_CHARS, *s1))
268                     wrbuf_putc(term_dict, '\\');
269                 wrbuf_putc(term_dict, *s1);
270                 s1++;
271             }
272         }
273         else
274         {
275             char tmpbuf[80];
276             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
277
278             wrbuf_puts(term_dict, map[0]);
279         }
280     }
281     dst_term[j] = '\0';
282     *src = s0;
283     return i;
284 }
285
286 /* term_101: handle term, where trunc = Process # */
287 static int term_101(ZebraMaps zebra_maps, int reg_type,
288                     const char **src, WRBUF term_dict, int space_split,
289                     char *dst_term)
290 {
291     const char *s0;
292     const char **map;
293     int i = 0;
294     int j = 0;
295
296     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
297         return 0;
298     s0 = *src;
299     while (*s0)
300     {
301         if (*s0 == '#')
302         {
303             i++;
304             wrbuf_puts(term_dict, ".*");
305             dst_term[j++] = *s0++;
306         }
307         else
308         {
309             const char *s1 = s0;
310             int q_map_match = 0;
311             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
312                                     &q_map_match);
313             if (space_split && **map == *CHR_SPACE)
314                 break;
315
316             i++;
317             /* add non-space char */
318             memcpy(dst_term+j, s1, s0 - s1);
319             j += (s0 - s1);
320             if (!q_map_match)
321             {
322                 while (s1 < s0)
323                 {
324                     if (strchr(REGEX_CHARS, *s1))
325                         wrbuf_putc(term_dict, '\\');
326                     wrbuf_putc(term_dict, *s1);
327                     s1++;
328                 }
329             }
330             else
331             {
332                 char tmpbuf[80];
333                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
334
335                 wrbuf_puts(term_dict, map[0]);
336             }
337         }
338     }
339     dst_term[j++] = '\0';
340     *src = s0;
341     return i;
342 }
343
344 /* term_103: handle term, where trunc = re-2 (regular expressions) */
345 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
346                     WRBUF term_dict, int *errors, int space_split,
347                     char *dst_term)
348 {
349     int i = 0;
350     int j = 0;
351     const char *s0;
352     const char **map;
353
354     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
355         return 0;
356     s0 = *src;
357     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
358         isdigit(((const unsigned char *)s0)[1]))
359     {
360         *errors = s0[1] - '0';
361         s0 += 3;
362         if (*errors > 3)
363             *errors = 3;
364     }
365     while (*s0)
366     {
367         if (strchr("^\\()[].*+?|-", *s0))
368         {
369             dst_term[j++] = *s0;
370             wrbuf_putc(term_dict, *s0);
371             s0++;
372             i++;
373         }
374         else
375         {
376             const char *s1 = s0;
377             int q_map_match = 0;
378             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
379                                     &q_map_match);
380             if (space_split && **map == *CHR_SPACE)
381                 break;
382
383             /* add non-space char */
384             memcpy(dst_term+j, s1, s0 - s1);
385             j += (s0 - s1);
386             i++;
387             if (!q_map_match)
388             {
389                 while (s1 < s0)
390                 {
391                     if (strchr(REGEX_CHARS, *s1))
392                         wrbuf_putc(term_dict, '\\');
393                     wrbuf_putc(term_dict, *s1);
394                     s1++;
395                 }
396             }
397             else
398             {
399                 char tmpbuf[80];
400                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
401
402                 wrbuf_puts(term_dict, map[0]);
403             }
404         }
405     }
406     dst_term[j] = '\0';
407     *src = s0;
408     
409     return i;
410 }
411
412 /* term_103: handle term, where trunc = re-1 (regular expressions) */
413 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
414                     WRBUF term_dict, int space_split, char *dst_term)
415 {
416     return term_103(zebra_maps, reg_type, src, term_dict, NULL, space_split,
417                     dst_term);
418 }
419
420
421 /* term_104: handle term, process # and ! */
422 static int term_104(ZebraMaps zebra_maps, int reg_type,
423                     const char **src, WRBUF term_dict, int space_split,
424                     char *dst_term)
425 {
426     const char *s0;
427     const char **map;
428     int i = 0;
429     int j = 0;
430
431     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
432         return 0;
433     s0 = *src;
434     while (*s0)
435     {
436         if (*s0 == '?')
437         {
438             i++;
439             dst_term[j++] = *s0++;
440             if (*s0 >= '0' && *s0 <= '9')
441             {
442                 int limit = 0;
443                 while (*s0 >= '0' && *s0 <= '9')
444                 {
445                     limit = limit * 10 + (*s0 - '0');
446                     dst_term[j++] = *s0++;
447                 }
448                 if (limit > 20)
449                     limit = 20;
450                 while (--limit >= 0)
451                 {
452                     wrbuf_puts(term_dict, ".?");
453                 }
454             }
455             else
456             {
457                 wrbuf_puts(term_dict, ".*");
458             }
459         }
460         else if (*s0 == '*')
461         {
462             i++;
463             wrbuf_puts(term_dict, ".*");
464             dst_term[j++] = *s0++;
465         }
466         else if (*s0 == '#')
467         {
468             i++;
469             wrbuf_puts(term_dict, ".");
470             dst_term[j++] = *s0++;
471         }
472         else
473         {
474             const char *s1 = s0;
475             int q_map_match = 0;
476             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
477                                     &q_map_match);
478             if (space_split && **map == *CHR_SPACE)
479                 break;
480
481             i++;
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         wrbuf_putc(term_dict, '\\');
491                     wrbuf_putc(term_dict, *s1);
492                     s1++;
493                 }
494             }
495             else
496             {
497                 char tmpbuf[80];
498                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
499                 
500                 wrbuf_puts(term_dict, map[0]);
501             }
502         }
503     }
504     dst_term[j++] = '\0';
505     *src = s0;
506     return i;
507 }
508
509 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
510 static int term_105(ZebraMaps zebra_maps, int reg_type,
511                     const char **src, WRBUF term_dict, int space_split,
512                     char *dst_term, int right_truncate)
513 {
514     const char *s0;
515     const char **map;
516     int i = 0;
517     int j = 0;
518
519     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
520         return 0;
521     s0 = *src;
522     while (*s0)
523     {
524         if (*s0 == '*')
525         {
526             i++;
527             wrbuf_puts(term_dict, ".*");
528             dst_term[j++] = *s0++;
529         }
530         else if (*s0 == '!')
531         {
532             i++;
533             wrbuf_putc(term_dict, '.');
534             dst_term[j++] = *s0++;
535         }
536         else
537         {
538             const char *s1 = s0;
539             int q_map_match = 0;
540             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
541                                     &q_map_match);
542             if (space_split && **map == *CHR_SPACE)
543                 break;
544
545             i++;
546             /* add non-space char */
547             memcpy(dst_term+j, s1, s0 - s1);
548             j += (s0 - s1);
549             if (!q_map_match)
550             {
551                 while (s1 < s0)
552                 {
553                     if (strchr(REGEX_CHARS, *s1))
554                         wrbuf_putc(term_dict, '\\');
555                     wrbuf_putc(term_dict, *s1);
556                     s1++;
557                 }
558             }
559             else
560             {
561                 char tmpbuf[80];
562                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
563                 
564                 wrbuf_puts(term_dict, map[0]);
565             }
566         }
567     }
568     if (right_truncate)
569         wrbuf_puts(term_dict, ".*");
570     dst_term[j++] = '\0';
571     *src = s0;
572     return i;
573 }
574
575
576 /* gen_regular_rel - generate regular expression from relation
577  *  val:     border value (inclusive)
578  *  islt:    1 if <=; 0 if >=.
579  */
580 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
581 {
582     char dst_buf[20*5*20]; /* assuming enough for expansion */
583     char *dst = dst_buf;
584     int dst_p;
585     int w, d, i;
586     int pos = 0;
587     char numstr[20];
588
589     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
590     if (val >= 0)
591     {
592         if (islt)
593             strcpy(dst, "(-[0-9]+|(");
594         else
595             strcpy(dst, "((");
596     } 
597     else
598     {
599         if (!islt)
600         {
601             strcpy(dst, "([0-9]+|-(");
602             islt = 1;
603         }
604         else
605         {
606             strcpy(dst, "(-(");
607             islt = 0;
608         }
609         val = -val;
610     }
611     dst_p = strlen(dst);
612     sprintf(numstr, "%d", val);
613     for (w = strlen(numstr); --w >= 0; pos++)
614     {
615         d = numstr[w];
616         if (pos > 0)
617         {
618             if (islt)
619             {
620                 if (d == '0')
621                     continue;
622                 d--;
623             } 
624             else
625             {
626                 if (d == '9')
627                     continue;
628                 d++;
629             }
630         }
631         
632         strcpy(dst + dst_p, numstr);
633         dst_p = strlen(dst) - pos - 1;
634
635         if (islt)
636         {
637             if (d != '0')
638             {
639                 dst[dst_p++] = '[';
640                 dst[dst_p++] = '0';
641                 dst[dst_p++] = '-';
642                 dst[dst_p++] = d;
643                 dst[dst_p++] = ']';
644             }
645             else
646                 dst[dst_p++] = d;
647         }
648         else
649         {
650             if (d != '9')
651             { 
652                 dst[dst_p++] = '[';
653                 dst[dst_p++] = d;
654                 dst[dst_p++] = '-';
655                 dst[dst_p++] = '9';
656                 dst[dst_p++] = ']';
657             }
658             else
659                 dst[dst_p++] = d;
660         }
661         for (i = 0; i<pos; i++)
662         {
663             dst[dst_p++] = '[';
664             dst[dst_p++] = '0';
665             dst[dst_p++] = '-';
666             dst[dst_p++] = '9';
667             dst[dst_p++] = ']';
668         }
669         dst[dst_p++] = '|';
670     }
671     dst[dst_p] = '\0';
672     if (islt)
673     {
674         /* match everything less than 10^(pos-1) */
675         strcat(dst, "0*");
676         for (i = 1; i<pos; i++)
677             strcat(dst, "[0-9]?");
678     }
679     else
680     {
681         /* match everything greater than 10^pos */
682         for (i = 0; i <= pos; i++)
683             strcat(dst, "[0-9]");
684         strcat(dst, "[0-9]*");
685     }
686     strcat(dst, "))");
687     wrbuf_puts(term_dict, dst);
688 }
689
690 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
691 {
692     const char *src = wrbuf_cstr(wsrc);
693     if (src[*indx] == '\\')
694     {
695         wrbuf_putc(term_p, src[*indx]);
696         (*indx)++;
697     }
698     wrbuf_putc(term_p, src[*indx]);
699     (*indx)++;
700 }
701
702 /*
703  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
704  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
705  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
706  *              ([^-a].*|a[^-b].*|ab[c-].*)
707  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
708  *              ([^a-].*|a[^b-].*|ab[^c-].*)
709  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
710  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
711  */
712 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
713                            const char **term_sub, WRBUF term_dict,
714                            const Odr_oid *attributeSet,
715                            int reg_type, int space_split, char *term_dst,
716                            int *error_code)
717 {
718     AttrType relation;
719     int relation_value;
720     int i;
721     WRBUF term_component = wrbuf_alloc();
722
723     attr_init_APT(&relation, zapt, 2);
724     relation_value = attr_find(&relation, NULL);
725
726     *error_code = 0;
727     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
728     switch (relation_value)
729     {
730     case 1:
731         if (!term_100(zh->reg->zebra_maps, reg_type,
732                       term_sub, term_component,
733                       space_split, term_dst))
734         {
735             wrbuf_destroy(term_component);
736             return 0;
737         }
738         yaz_log(log_level_rpn, "Relation <");
739         
740         wrbuf_putc(term_dict, '(');
741         for (i = 0; i < wrbuf_len(term_component); )
742         {
743             int j = 0;
744             
745             if (i)
746                 wrbuf_putc(term_dict, '|');
747             while (j < i)
748                 string_rel_add_char(term_dict, term_component, &j);
749
750             wrbuf_putc(term_dict, '[');
751
752             wrbuf_putc(term_dict, '^');
753             
754             wrbuf_putc(term_dict, 1);
755             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
756             
757             string_rel_add_char(term_dict, term_component, &i);
758             wrbuf_putc(term_dict, '-');
759             
760             wrbuf_putc(term_dict, ']');
761             wrbuf_putc(term_dict, '.');
762             wrbuf_putc(term_dict, '*');
763         }
764         wrbuf_putc(term_dict, ')');
765         break;
766     case 2:
767         if (!term_100(zh->reg->zebra_maps, reg_type,
768                       term_sub, term_component,
769                       space_split, term_dst))
770         {
771             wrbuf_destroy(term_component);
772             return 0;
773         }
774         yaz_log(log_level_rpn, "Relation <=");
775
776         wrbuf_putc(term_dict, '(');
777         for (i = 0; i < wrbuf_len(term_component); )
778         {
779             int j = 0;
780
781             while (j < i)
782                 string_rel_add_char(term_dict, term_component, &j);
783             wrbuf_putc(term_dict, '[');
784
785             wrbuf_putc(term_dict, '^');
786
787             wrbuf_putc(term_dict, 1);
788             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
789
790             string_rel_add_char(term_dict, term_component, &i);
791             wrbuf_putc(term_dict, '-');
792
793             wrbuf_putc(term_dict, ']');
794             wrbuf_putc(term_dict, '.');
795             wrbuf_putc(term_dict, '*');
796
797             wrbuf_putc(term_dict, '|');
798         }
799         for (i = 0; i < wrbuf_len(term_component); )
800             string_rel_add_char(term_dict, term_component, &i);
801         wrbuf_putc(term_dict, ')');
802         break;
803     case 5:
804         if (!term_100 (zh->reg->zebra_maps, reg_type,
805                        term_sub, term_component, space_split, term_dst))
806         {
807             wrbuf_destroy(term_component);
808             return 0;
809         }
810         yaz_log(log_level_rpn, "Relation >");
811
812         wrbuf_putc(term_dict, '(');
813         for (i = 0; i < wrbuf_len(term_component); )
814         {
815             int j = 0;
816
817             while (j < i)
818                 string_rel_add_char(term_dict, term_component, &j);
819             wrbuf_putc(term_dict, '[');
820             
821             wrbuf_putc(term_dict, '^');
822             wrbuf_putc(term_dict, '-');
823             string_rel_add_char(term_dict, term_component, &i);
824
825             wrbuf_putc(term_dict, ']');
826             wrbuf_putc(term_dict, '.');
827             wrbuf_putc(term_dict, '*');
828
829             wrbuf_putc(term_dict, '|');
830         }
831         for (i = 0; i < wrbuf_len(term_component); )
832             string_rel_add_char(term_dict, term_component, &i);
833         wrbuf_putc(term_dict, '.');
834         wrbuf_putc(term_dict, '+');
835         wrbuf_putc(term_dict, ')');
836         break;
837     case 4:
838         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
839                       term_component, space_split, term_dst))
840         {
841             wrbuf_destroy(term_component);
842             return 0;
843         }
844         yaz_log(log_level_rpn, "Relation >=");
845
846         wrbuf_putc(term_dict, '(');
847         for (i = 0; i < wrbuf_len(term_component); )
848         {
849             int j = 0;
850
851             if (i)
852                 wrbuf_putc(term_dict, '|');
853             while (j < i)
854                 string_rel_add_char(term_dict, term_component, &j);
855             wrbuf_putc(term_dict, '[');
856
857             if (i < wrbuf_len(term_component)-1)
858             {
859                 wrbuf_putc(term_dict, '^');
860                 wrbuf_putc(term_dict, '-');
861                 string_rel_add_char(term_dict, term_component, &i);
862             }
863             else
864             {
865                 string_rel_add_char(term_dict, term_component, &i);
866                 wrbuf_putc(term_dict, '-');
867             }
868             wrbuf_putc(term_dict, ']');
869             wrbuf_putc(term_dict, '.');
870             wrbuf_putc(term_dict, '*');
871         }
872         wrbuf_putc(term_dict, ')');
873         break;
874     case 3:
875     case 102:
876     case -1:
877         if (!**term_sub)
878             return 1;
879         yaz_log(log_level_rpn, "Relation =");
880         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
881                       term_component, space_split, term_dst))
882         {
883             wrbuf_destroy(term_component);
884             return 0;
885         }
886         wrbuf_puts(term_dict, "(");
887         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
888         wrbuf_puts(term_dict, ")");
889         break;
890     case 103:
891         yaz_log(log_level_rpn, "Relation always matches");
892         /* skip to end of term (we don't care what it is) */
893         while (**term_sub != '\0')
894             (*term_sub)++;
895         break;
896     default:
897         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898         wrbuf_destroy(term_component);
899         return 0;
900     }
901     wrbuf_destroy(term_component);
902     return 1;
903 }
904
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906                              const char **term_sub, 
907                              WRBUF term_dict,
908                              const Odr_oid *attributeSet, NMEM stream,
909                              struct grep_info *grep_info,
910                              int reg_type, int complete_flag,
911                              int num_bases, char **basenames,
912                              char *term_dst,
913                              const char *xpath_use,
914                              struct ord_list **ol);
915
916 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
917                                  Z_AttributesPlusTerm *zapt,
918                                  zint *hits_limit_value,
919                                  const char **term_ref_id_str,
920                                  NMEM nmem)
921 {
922     AttrType term_ref_id_attr;
923     AttrType hits_limit_attr;
924     int term_ref_id_int;
925  
926     attr_init_APT(&hits_limit_attr, zapt, 11);
927     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
928
929     attr_init_APT(&term_ref_id_attr, zapt, 10);
930     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
931     if (term_ref_id_int >= 0)
932     {
933         char *res = nmem_malloc(nmem, 20);
934         sprintf(res, "%d", term_ref_id_int);
935         *term_ref_id_str = res;
936     }
937
938     /* no limit given ? */
939     if (*hits_limit_value == -1)
940     {
941         if (*term_ref_id_str)
942         {
943             /* use global if term_ref is present */
944             *hits_limit_value = zh->approx_limit;
945         }
946         else
947         {
948             /* no counting if term_ref is not present */
949             *hits_limit_value = 0;
950         }
951     }
952     else if (*hits_limit_value == 0)
953     {
954         /* 0 is the same as global limit */
955         *hits_limit_value = zh->approx_limit;
956     }
957     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
958             *term_ref_id_str ? *term_ref_id_str : "none",
959             *hits_limit_value);
960     return ZEBRA_OK;
961 }
962
963 static ZEBRA_RES term_trunc(ZebraHandle zh,
964                             Z_AttributesPlusTerm *zapt,
965                             const char **term_sub, 
966                             const Odr_oid *attributeSet, NMEM stream,
967                             struct grep_info *grep_info,
968                             int reg_type, int complete_flag,
969                             int num_bases, char **basenames,
970                             char *term_dst,
971                             const char *rank_type, 
972                             const char *xpath_use,
973                             NMEM rset_nmem,
974                             RSET *rset,
975                             struct rset_key_control *kc)
976 {
977     ZEBRA_RES res;
978     struct ord_list *ol;
979     zint hits_limit_value;
980     const char *term_ref_id_str = 0;
981     WRBUF term_dict = wrbuf_alloc();
982
983     *rset = 0;
984     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
985     grep_info->isam_p_indx = 0;
986     res = string_term(zh, zapt, term_sub, term_dict,
987                       attributeSet, stream, grep_info,
988                       reg_type, complete_flag, num_bases, basenames,
989                       term_dst, xpath_use, &ol);
990     wrbuf_destroy(term_dict);
991     if (res != ZEBRA_OK)
992         return res;
993     if (!*term_sub)  /* no more terms ? */
994         return res;
995     yaz_log(log_level_rpn, "term: %s", term_dst);
996     *rset = rset_trunc(zh, grep_info->isam_p_buf,
997                        grep_info->isam_p_indx, term_dst,
998                        strlen(term_dst), rank_type, 1 /* preserve pos */,
999                        zapt->term->which, rset_nmem,
1000                        kc, kc->scope, ol, reg_type, hits_limit_value,
1001                        term_ref_id_str);
1002     if (!*rset)
1003         return ZEBRA_FAIL;
1004     return ZEBRA_OK;
1005 }
1006
1007 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1008                              const char **term_sub, 
1009                              WRBUF term_dict,
1010                              const Odr_oid *attributeSet, NMEM stream,
1011                              struct grep_info *grep_info,
1012                              int reg_type, int complete_flag,
1013                              int num_bases, char **basenames,
1014                              char *term_dst,
1015                              const char *xpath_use,
1016                              struct ord_list **ol)
1017 {
1018     int r, base_no;
1019     AttrType truncation;
1020     int truncation_value;
1021     const char *termp;
1022     struct rpn_char_map_info rcmi;
1023     int space_split = complete_flag ? 0 : 1;
1024
1025     int bases_ok = 0;     /* no of databases with OK attribute */
1026
1027     *ol = ord_list_create(stream);
1028
1029     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1030     attr_init_APT(&truncation, zapt, 5);
1031     truncation_value = attr_find(&truncation, NULL);
1032     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1033
1034     for (base_no = 0; base_no < num_bases; base_no++)
1035     {
1036         int ord = -1;
1037         int regex_range = 0;
1038         int max_pos, prefix_len = 0;
1039         int relation_error;
1040         char ord_buf[32];
1041         int ord_len, i;
1042
1043         termp = *term_sub; /* start of term for each database */
1044
1045
1046         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1047         {
1048             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049                            basenames[base_no]);
1050             return ZEBRA_FAIL;
1051         }
1052         
1053         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054                               attributeSet, &ord) != ZEBRA_OK)
1055             continue;
1056
1057
1058         wrbuf_rewind(term_dict); /* new dictionary regexp term */
1059
1060         bases_ok++;
1061
1062         *ol = ord_list_append(stream, *ol, ord);
1063         ord_len = key_SU_encode (ord, ord_buf);
1064         
1065         wrbuf_putc(term_dict, '(');
1066
1067         for (i = 0; i<ord_len; i++)
1068         {
1069             wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1070             wrbuf_putc(term_dict, ord_buf[i]);
1071         }
1072         wrbuf_putc(term_dict, ')');
1073
1074         prefix_len = wrbuf_len(term_dict);
1075
1076         switch (truncation_value)
1077         {
1078         case -1:         /* not specified */
1079         case 100:        /* do not truncate */
1080             if (!string_relation(zh, zapt, &termp, term_dict,
1081                                  attributeSet,
1082                                  reg_type, space_split, term_dst,
1083                                  &relation_error))
1084             {
1085                 if (relation_error)
1086                 {
1087                     zebra_setError(zh, relation_error, 0);
1088                     return ZEBRA_FAIL;
1089                 }
1090                 *term_sub = 0;
1091                 return ZEBRA_OK;
1092             }
1093             break;
1094         case 1:          /* right truncation */
1095             wrbuf_putc(term_dict, '(');
1096             if (!term_100(zh->reg->zebra_maps, reg_type,
1097                           &termp, term_dict, space_split, term_dst))
1098             {
1099                 *term_sub = 0;
1100                 return ZEBRA_OK;
1101             }
1102             wrbuf_puts(term_dict, ".*)");
1103             break;
1104         case 2:          /* keft truncation */
1105             wrbuf_puts(term_dict, "(.*");
1106             if (!term_100(zh->reg->zebra_maps, reg_type,
1107                           &termp, term_dict, space_split, term_dst))
1108             {
1109                 *term_sub = 0;
1110                 return ZEBRA_OK;
1111             }
1112             wrbuf_putc(term_dict, ')');
1113             break;
1114         case 3:          /* left&right truncation */
1115             wrbuf_puts(term_dict, "(.*");
1116             if (!term_100(zh->reg->zebra_maps, reg_type,
1117                           &termp, term_dict, space_split, term_dst))
1118             {
1119                 *term_sub = 0;
1120                 return ZEBRA_OK;
1121             }
1122             wrbuf_puts(term_dict, ".*)");
1123             break;
1124         case 101:        /* process # in term */
1125             wrbuf_putc(term_dict, '(');
1126             if (!term_101(zh->reg->zebra_maps, reg_type,
1127                           &termp, term_dict, space_split, term_dst))
1128             {
1129                 *term_sub = 0;
1130                 return ZEBRA_OK;
1131             }
1132             wrbuf_puts(term_dict, ")");
1133             break;
1134         case 102:        /* Regexp-1 */
1135             wrbuf_putc(term_dict, '(');
1136             if (!term_102(zh->reg->zebra_maps, reg_type,
1137                           &termp, term_dict, space_split, term_dst))
1138             {
1139                 *term_sub = 0;
1140                 return ZEBRA_OK;
1141             }
1142             wrbuf_putc(term_dict, ')');
1143             break;
1144         case 103:       /* Regexp-2 */
1145             regex_range = 1;
1146             wrbuf_putc(term_dict, '(');
1147             if (!term_103(zh->reg->zebra_maps, reg_type,
1148                           &termp, term_dict, &regex_range,
1149                           space_split, term_dst))
1150             {
1151                 *term_sub = 0;
1152                 return ZEBRA_OK;
1153             }
1154             wrbuf_putc(term_dict, ')');
1155             break;
1156         case 104:        /* process # and ! in term */
1157             wrbuf_putc(term_dict, '(');
1158             if (!term_104(zh->reg->zebra_maps, reg_type,
1159                           &termp, term_dict, space_split, term_dst))
1160             {
1161                 *term_sub = 0;
1162                 return ZEBRA_OK;
1163             }
1164             wrbuf_putc(term_dict, ')');
1165             break;
1166         case 105:        /* process * and ! in term */
1167             wrbuf_putc(term_dict, '(');
1168             if (!term_105(zh->reg->zebra_maps, reg_type,
1169                           &termp, term_dict, space_split, term_dst, 1))
1170             {
1171                 *term_sub = 0;
1172                 return ZEBRA_OK;
1173             }
1174             wrbuf_putc(term_dict, ')');
1175             break;
1176         case 106:        /* process * and ! in term */
1177             wrbuf_putc(term_dict, '(');
1178             if (!term_105(zh->reg->zebra_maps, reg_type,
1179                           &termp, term_dict, space_split, term_dst, 0))
1180             {
1181                 *term_sub = 0;
1182                 return ZEBRA_OK;
1183             }
1184             wrbuf_putc(term_dict, ')');
1185             break;
1186         default:
1187             zebra_setError_zint(zh,
1188                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1189                                 truncation_value);
1190             return ZEBRA_FAIL;
1191         }
1192         if (1)
1193         {
1194             char buf[1000];
1195             const char *input = wrbuf_cstr(term_dict) + prefix_len;
1196             esc_str(buf, sizeof(buf), input, strlen(input));
1197         }
1198         yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1199                 wrbuf_cstr(term_dict) + prefix_len);
1200         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1201                              grep_info, &max_pos, 
1202                              ord_len /* number of "exact" chars */,
1203                              grep_handle);
1204         if (r == 1)
1205             zebra_set_partial_result(zh);
1206         else if (r)
1207             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1208     }
1209     if (!bases_ok)
1210         return ZEBRA_FAIL;
1211     *term_sub = termp;
1212     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1213     return ZEBRA_OK;
1214 }
1215
1216
1217
1218 static void grep_info_delete(struct grep_info *grep_info)
1219 {
1220 #ifdef TERM_COUNT
1221     xfree(grep_info->term_no);
1222 #endif
1223     xfree(grep_info->isam_p_buf);
1224 }
1225
1226 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1227                                    Z_AttributesPlusTerm *zapt,
1228                                    struct grep_info *grep_info,
1229                                    int reg_type)
1230 {
1231 #ifdef TERM_COUNT
1232     grep_info->term_no = 0;
1233 #endif
1234     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1235     grep_info->isam_p_size = 0;
1236     grep_info->isam_p_buf = NULL;
1237     grep_info->zh = zh;
1238     grep_info->reg_type = reg_type;
1239     grep_info->termset = 0;
1240     if (zapt)
1241     {
1242         AttrType truncmax;
1243         int truncmax_value;
1244
1245         attr_init_APT(&truncmax, zapt, 13);
1246         truncmax_value = attr_find(&truncmax, NULL);
1247         if (truncmax_value != -1)
1248             grep_info->trunc_max = truncmax_value;
1249     }
1250     if (zapt)
1251     {
1252         AttrType termset;
1253         int termset_value_numeric;
1254         const char *termset_value_string;
1255
1256         attr_init_APT(&termset, zapt, 8);
1257         termset_value_numeric =
1258             attr_find_ex(&termset, NULL, &termset_value_string);
1259         if (termset_value_numeric != -1)
1260         {
1261 #if TERMSET_DISABLE
1262             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1263             return ZEBRA_FAIL;
1264 #else
1265             char resname[32];
1266             const char *termset_name = 0;
1267             if (termset_value_numeric != -2)
1268             {
1269                 
1270                 sprintf(resname, "%d", termset_value_numeric);
1271                 termset_name = resname;
1272             }
1273             else
1274             termset_name = termset_value_string;
1275             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1276             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1277             if (!grep_info->termset)
1278             {
1279                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1280                 return ZEBRA_FAIL;
1281             }
1282 #endif
1283         }
1284     }
1285     return ZEBRA_OK;
1286 }
1287                                
1288 /**
1289   \brief Create result set(s) for list of terms
1290   \param zh Zebra Handle
1291   \param zapt Attributes Plust Term (RPN leaf)
1292   \param termz term as used in query but converted to UTF-8
1293   \param attributeSet default attribute set
1294   \param stream memory for result
1295   \param reg_type register type ('w', 'p',..)
1296   \param complete_flag whether it's phrases or not
1297   \param rank_type term flags for ranking
1298   \param xpath_use use attribute for X-Path (-1 for no X-path)
1299   \param num_bases number of databases
1300   \param basenames array of databases
1301   \param rset_nmem memory for result sets
1302   \param result_sets output result set for each term in list (output)
1303   \param num_result_sets number of output result sets
1304   \param kc rset key control to be used for created result sets
1305 */
1306 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1307                                  Z_AttributesPlusTerm *zapt,
1308                                  const char *termz,
1309                                  const Odr_oid *attributeSet,
1310                                  NMEM stream,
1311                                  int reg_type, int complete_flag,
1312                                  const char *rank_type,
1313                                  const char *xpath_use,
1314                                  int num_bases, char **basenames, 
1315                                  NMEM rset_nmem,
1316                                  RSET **result_sets, int *num_result_sets,
1317                                  struct rset_key_control *kc)
1318 {
1319     char term_dst[IT_MAX_WORD+1];
1320     struct grep_info grep_info;
1321     const char *termp = termz;
1322     int alloc_sets = 0;
1323
1324     *num_result_sets = 0;
1325     *term_dst = 0;
1326     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1327         return ZEBRA_FAIL;
1328     while(1)
1329     { 
1330         ZEBRA_RES res;
1331
1332         if (alloc_sets == *num_result_sets)
1333         {
1334             int add = 10;
1335             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1336                                               sizeof(*rnew));
1337             if (alloc_sets)
1338                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1339             alloc_sets = alloc_sets + add;
1340             *result_sets = rnew;
1341         }
1342         res = term_trunc(zh, zapt, &termp, attributeSet,
1343                          stream, &grep_info,
1344                          reg_type, complete_flag,
1345                          num_bases, basenames,
1346                          term_dst, rank_type,
1347                          xpath_use, rset_nmem,
1348                          &(*result_sets)[*num_result_sets],
1349                          kc);
1350         if (res != ZEBRA_OK)
1351         {
1352             int i;
1353             for (i = 0; i < *num_result_sets; i++)
1354                 rset_delete((*result_sets)[i]);
1355             grep_info_delete (&grep_info);
1356             return res;
1357         }
1358         if ((*result_sets)[*num_result_sets] == 0)
1359             break;
1360         (*num_result_sets)++;
1361
1362         if (!*termp)
1363             break;
1364     }
1365     grep_info_delete(&grep_info);
1366     return ZEBRA_OK;
1367 }
1368
1369 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1370                                          Z_AttributesPlusTerm *zapt,
1371                                          const Odr_oid *attributeSet,
1372                                          int reg_type,
1373                                          int num_bases, char **basenames,
1374                                          NMEM rset_nmem,
1375                                          RSET *rset,
1376                                          struct rset_key_control *kc)
1377 {
1378     RSET *f_set;
1379     int base_no;
1380     int position_value;
1381     int num_sets = 0;
1382     AttrType position;
1383
1384     attr_init_APT(&position, zapt, 3);
1385     position_value = attr_find(&position, NULL);
1386     switch(position_value)
1387     {
1388     case 3:
1389     case -1:
1390         return ZEBRA_OK;
1391     case 1:
1392     case 2:
1393         break;
1394     default:
1395         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1396                             position_value);
1397         return ZEBRA_FAIL;
1398     }
1399
1400     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1401     {
1402         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1403                             position_value);
1404         return ZEBRA_FAIL;
1405     }
1406
1407     if (!zh->reg->isamb && !zh->reg->isamc)
1408     {
1409         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1410                             position_value);
1411         return ZEBRA_FAIL;
1412     }
1413     f_set = xmalloc(sizeof(RSET) * num_bases);
1414     for (base_no = 0; base_no < num_bases; base_no++)
1415     {
1416         int ord = -1;
1417         char ord_buf[32];
1418         char term_dict[100];
1419         int ord_len;
1420         char *val;
1421         ISAM_P isam_p;
1422
1423         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1424         {
1425             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1426                            basenames[base_no]);
1427             return ZEBRA_FAIL;
1428         }
1429         
1430         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1431                               attributeSet, &ord) != ZEBRA_OK)
1432             continue;
1433
1434         ord_len = key_SU_encode (ord, ord_buf);
1435         memcpy(term_dict, ord_buf, ord_len);
1436         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1437         val = dict_lookup(zh->reg->dict, term_dict);
1438         if (!val)
1439             continue;
1440         assert(*val == sizeof(ISAM_P));
1441         memcpy(&isam_p, val+1, sizeof(isam_p));
1442         
1443
1444         if (zh->reg->isamb)
1445             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1446                                                zh->reg->isamb, isam_p, 0);
1447         else if (zh->reg->isamc)
1448             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1449                                                zh->reg->isamc, isam_p, 0);
1450     }
1451     if (num_sets)
1452     {
1453         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1454                                0 /* termid */, num_sets, f_set);
1455     }
1456     xfree(f_set);
1457     return ZEBRA_OK;
1458 }
1459                                          
1460 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1461                                        Z_AttributesPlusTerm *zapt,
1462                                        const char *termz_org,
1463                                        const Odr_oid *attributeSet,
1464                                        NMEM stream,
1465                                        int reg_type, int complete_flag,
1466                                        const char *rank_type,
1467                                        const char *xpath_use,
1468                                        int num_bases, char **basenames, 
1469                                        NMEM rset_nmem,
1470                                        RSET *rset,
1471                                        struct rset_key_control *kc)
1472 {
1473     RSET *result_sets = 0;
1474     int num_result_sets = 0;
1475     ZEBRA_RES res =
1476         term_list_trunc(zh, zapt, termz_org, attributeSet,
1477                         stream, reg_type, complete_flag,
1478                         rank_type, xpath_use,
1479                         num_bases, basenames,
1480                         rset_nmem,
1481                         &result_sets, &num_result_sets, kc);
1482
1483     if (res != ZEBRA_OK)
1484         return res;
1485
1486     if (num_result_sets > 0)
1487     {
1488         RSET first_set = 0;
1489         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1490                                       reg_type,
1491                                       num_bases, basenames,
1492                                       rset_nmem, &first_set,
1493                                       kc);
1494         if (res != ZEBRA_OK)
1495             return res;
1496         if (first_set)
1497         {
1498             RSET *nsets = nmem_malloc(stream,
1499                                       sizeof(RSET) * (num_result_sets+1));
1500             nsets[0] = first_set;
1501             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1502             result_sets = nsets;
1503             num_result_sets++;
1504         }
1505     }
1506     if (num_result_sets == 0)
1507         *rset = rset_create_null(rset_nmem, kc, 0); 
1508     else if (num_result_sets == 1)
1509         *rset = result_sets[0];
1510     else
1511         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1512                                  num_result_sets, result_sets,
1513                                  1 /* ordered */, 0 /* exclusion */,
1514                                  3 /* relation */, 1 /* distance */);
1515     if (!*rset)
1516         return ZEBRA_FAIL;
1517     return ZEBRA_OK;
1518 }
1519
1520 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1521                                         Z_AttributesPlusTerm *zapt,
1522                                         const char *termz_org,
1523                                         const Odr_oid *attributeSet,
1524                                         NMEM stream,
1525                                         int reg_type, int complete_flag,
1526                                         const char *rank_type,
1527                                         const char *xpath_use,
1528                                         int num_bases, char **basenames,
1529                                         NMEM rset_nmem,
1530                                         RSET *rset,
1531                                         struct rset_key_control *kc)
1532 {
1533     RSET *result_sets = 0;
1534     int num_result_sets = 0;
1535     int i;
1536     ZEBRA_RES res =
1537         term_list_trunc(zh, zapt, termz_org, attributeSet,
1538                         stream, reg_type, complete_flag,
1539                         rank_type, xpath_use,
1540                         num_bases, basenames,
1541                         rset_nmem,
1542                         &result_sets, &num_result_sets, kc);
1543     if (res != ZEBRA_OK)
1544         return res;
1545
1546     for (i = 0; i<num_result_sets; i++)
1547     {
1548         RSET first_set = 0;
1549         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1550                                       reg_type,
1551                                       num_bases, basenames,
1552                                       rset_nmem, &first_set,
1553                                       kc);
1554         if (res != ZEBRA_OK)
1555         {
1556             for (i = 0; i<num_result_sets; i++)
1557                 rset_delete(result_sets[i]);
1558             return res;
1559         }
1560
1561         if (first_set)
1562         {
1563             RSET tmp_set[2];
1564
1565             tmp_set[0] = first_set;
1566             tmp_set[1] = result_sets[i];
1567             
1568             result_sets[i] = rset_create_prox(
1569                 rset_nmem, kc, kc->scope,
1570                 2, tmp_set,
1571                 1 /* ordered */, 0 /* exclusion */,
1572                 3 /* relation */, 1 /* distance */);
1573         }
1574     }
1575     if (num_result_sets == 0)
1576         *rset = rset_create_null(rset_nmem, kc, 0); 
1577     else if (num_result_sets == 1)
1578         *rset = result_sets[0];
1579     else
1580         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1581                                num_result_sets, result_sets);
1582     if (!*rset)
1583         return ZEBRA_FAIL;
1584     return ZEBRA_OK;
1585 }
1586
1587 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1588                                          Z_AttributesPlusTerm *zapt,
1589                                          const char *termz_org,
1590                                          const Odr_oid *attributeSet,
1591                                          NMEM stream,
1592                                          int reg_type, int complete_flag,
1593                                          const char *rank_type, 
1594                                          const char *xpath_use,
1595                                          int num_bases, char **basenames,
1596                                          NMEM rset_nmem,
1597                                          RSET *rset,
1598                                          struct rset_key_control *kc)
1599 {
1600     RSET *result_sets = 0;
1601     int num_result_sets = 0;
1602     int i;
1603     ZEBRA_RES res =
1604         term_list_trunc(zh, zapt, termz_org, attributeSet,
1605                         stream, reg_type, complete_flag,
1606                         rank_type, xpath_use,
1607                         num_bases, basenames,
1608                         rset_nmem,
1609                         &result_sets, &num_result_sets,
1610                         kc);
1611     if (res != ZEBRA_OK)
1612         return res;
1613     for (i = 0; i<num_result_sets; i++)
1614     {
1615         RSET first_set = 0;
1616         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1617                                       reg_type,
1618                                       num_bases, basenames,
1619                                       rset_nmem, &first_set,
1620                                       kc);
1621         if (res != ZEBRA_OK)
1622         {
1623             for (i = 0; i<num_result_sets; i++)
1624                 rset_delete(result_sets[i]);
1625             return res;
1626         }
1627
1628         if (first_set)
1629         {
1630             RSET tmp_set[2];
1631
1632             tmp_set[0] = first_set;
1633             tmp_set[1] = result_sets[i];
1634             
1635             result_sets[i] = rset_create_prox(
1636                 rset_nmem, kc, kc->scope,
1637                 2, tmp_set,
1638                 1 /* ordered */, 0 /* exclusion */,
1639                 3 /* relation */, 1 /* distance */);
1640         }
1641     }
1642
1643
1644     if (num_result_sets == 0)
1645         *rset = rset_create_null(rset_nmem, kc, 0); 
1646     else if (num_result_sets == 1)
1647         *rset = result_sets[0];
1648     else
1649         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1650                                 num_result_sets, result_sets);
1651     if (!*rset)
1652         return ZEBRA_FAIL;
1653     return ZEBRA_OK;
1654 }
1655
1656 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1657                             const char **term_sub,
1658                             WRBUF term_dict,
1659                             const Odr_oid *attributeSet,
1660                             struct grep_info *grep_info,
1661                             int *max_pos,
1662                             int reg_type,
1663                             char *term_dst,
1664                             int *error_code)
1665 {
1666     AttrType relation;
1667     int relation_value;
1668     int term_value;
1669     int r;
1670     WRBUF term_num = wrbuf_alloc();
1671
1672     *error_code = 0;
1673     attr_init_APT(&relation, zapt, 2);
1674     relation_value = attr_find(&relation, NULL);
1675
1676     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1677
1678     switch (relation_value)
1679     {
1680     case 1:
1681         yaz_log(log_level_rpn, "Relation <");
1682         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1683                       term_dst))
1684         { 
1685             wrbuf_destroy(term_num);
1686             return 0;
1687         }
1688         term_value = atoi (wrbuf_cstr(term_num));
1689         gen_regular_rel(term_dict, term_value-1, 1);
1690         break;
1691     case 2:
1692         yaz_log(log_level_rpn, "Relation <=");
1693         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1694                       term_dst))
1695         {
1696             wrbuf_destroy(term_num);
1697             return 0;
1698         }
1699         term_value = atoi (wrbuf_cstr(term_num));
1700         gen_regular_rel(term_dict, term_value, 1);
1701         break;
1702     case 4:
1703         yaz_log(log_level_rpn, "Relation >=");
1704         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1705                       term_dst))
1706         {
1707             wrbuf_destroy(term_num);
1708             return 0;
1709         }
1710         term_value = atoi (wrbuf_cstr(term_num));
1711         gen_regular_rel(term_dict, term_value, 0);
1712         break;
1713     case 5:
1714         yaz_log(log_level_rpn, "Relation >");
1715         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1716                       term_dst))
1717         {
1718             wrbuf_destroy(term_num);
1719             return 0;
1720         }
1721         term_value = atoi (wrbuf_cstr(term_num));
1722         gen_regular_rel(term_dict, term_value+1, 0);
1723         break;
1724     case -1:
1725     case 3:
1726         yaz_log(log_level_rpn, "Relation =");
1727         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1728                       term_dst))
1729         {
1730             wrbuf_destroy(term_num);
1731             return 0; 
1732         }
1733         term_value = atoi (wrbuf_cstr(term_num));
1734         wrbuf_printf(term_dict, "(0*%d)", term_value);
1735         break;
1736     case 103:
1737         /* term_tmp untouched.. */
1738         while (**term_sub != '\0')
1739             (*term_sub)++;
1740         break;
1741     default:
1742         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1743         wrbuf_destroy(term_num); 
1744         return 0;
1745     }
1746     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1747                          0, grep_info, max_pos, 0, grep_handle);
1748
1749     if (r == 1)
1750         zebra_set_partial_result(zh);
1751     else if (r)
1752         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1753     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1754     wrbuf_destroy(term_num);
1755     return 1;
1756 }
1757
1758 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1759                               const char **term_sub, 
1760                               WRBUF term_dict,
1761                               const Odr_oid *attributeSet, NMEM stream,
1762                               struct grep_info *grep_info,
1763                               int reg_type, int complete_flag,
1764                               int num_bases, char **basenames,
1765                               char *term_dst, 
1766                               const char *xpath_use,
1767                               struct ord_list **ol)
1768 {
1769     int base_no;
1770     const char *termp;
1771     struct rpn_char_map_info rcmi;
1772
1773     int bases_ok = 0;     /* no of databases with OK attribute */
1774
1775     *ol = ord_list_create(stream);
1776
1777     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1778
1779     for (base_no = 0; base_no < num_bases; base_no++)
1780     {
1781         int max_pos;
1782         int relation_error = 0;
1783         int ord, ord_len, i;
1784         char ord_buf[32];
1785
1786         termp = *term_sub;
1787
1788         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1789         {
1790             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1791                            basenames[base_no]);
1792             return ZEBRA_FAIL;
1793         }
1794
1795         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1796                               attributeSet, &ord) != ZEBRA_OK)
1797             continue;
1798         bases_ok++;
1799
1800         wrbuf_rewind(term_dict);
1801
1802         *ol = ord_list_append(stream, *ol, ord);
1803
1804         ord_len = key_SU_encode (ord, ord_buf);
1805
1806         wrbuf_putc(term_dict, '(');
1807         for (i = 0; i < ord_len; i++)
1808         {
1809             wrbuf_putc(term_dict, 1);
1810             wrbuf_putc(term_dict, ord_buf[i]);
1811         }
1812         wrbuf_putc(term_dict, ')');
1813
1814         if (!numeric_relation(zh, zapt, &termp, term_dict,
1815                               attributeSet, grep_info, &max_pos, reg_type,
1816                               term_dst, &relation_error))
1817         {
1818             if (relation_error)
1819             {
1820                 zebra_setError(zh, relation_error, 0);
1821                 return ZEBRA_FAIL;
1822             }
1823             *term_sub = 0;
1824             return ZEBRA_OK;
1825         }
1826     }
1827     if (!bases_ok)
1828         return ZEBRA_FAIL;
1829     *term_sub = termp;
1830     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1831     return ZEBRA_OK;
1832 }
1833
1834                                  
1835 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1836                                         Z_AttributesPlusTerm *zapt,
1837                                         const char *termz,
1838                                         const Odr_oid *attributeSet,
1839                                         NMEM stream,
1840                                         int reg_type, int complete_flag,
1841                                         const char *rank_type, 
1842                                         const char *xpath_use,
1843                                         int num_bases, char **basenames,
1844                                         NMEM rset_nmem,
1845                                         RSET *rset,
1846                                         struct rset_key_control *kc)
1847 {
1848     char term_dst[IT_MAX_WORD+1];
1849     const char *termp = termz;
1850     RSET *result_sets = 0;
1851     int num_result_sets = 0;
1852     ZEBRA_RES res;
1853     struct grep_info grep_info;
1854     int alloc_sets = 0;
1855     zint hits_limit_value;
1856     const char *term_ref_id_str = 0;
1857
1858     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1859
1860     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1861     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1862         return ZEBRA_FAIL;
1863     while (1)
1864     { 
1865         struct ord_list *ol;
1866         WRBUF term_dict = wrbuf_alloc();
1867         if (alloc_sets == num_result_sets)
1868         {
1869             int add = 10;
1870             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1871                                               sizeof(*rnew));
1872             if (alloc_sets)
1873                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1874             alloc_sets = alloc_sets + add;
1875             result_sets = rnew;
1876         }
1877         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1878         grep_info.isam_p_indx = 0;
1879         res = numeric_term(zh, zapt, &termp, term_dict,
1880                            attributeSet, stream, &grep_info,
1881                            reg_type, complete_flag, num_bases, basenames,
1882                            term_dst, xpath_use, &ol);
1883         wrbuf_destroy(term_dict);
1884         if (res == ZEBRA_FAIL || termp == 0)
1885             break;
1886         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1887         result_sets[num_result_sets] =
1888             rset_trunc(zh, grep_info.isam_p_buf,
1889                        grep_info.isam_p_indx, term_dst,
1890                        strlen(term_dst), rank_type,
1891                        0 /* preserve position */,
1892                        zapt->term->which, rset_nmem, 
1893                        kc, kc->scope, ol, reg_type,
1894                        hits_limit_value,
1895                        term_ref_id_str);
1896         if (!result_sets[num_result_sets])
1897             break;
1898         num_result_sets++;
1899         if (!*termp)
1900             break;
1901     }
1902     grep_info_delete(&grep_info);
1903
1904     if (res != ZEBRA_OK)
1905         return res;
1906     if (num_result_sets == 0)
1907         *rset = rset_create_null(rset_nmem, kc, 0);
1908     else if (num_result_sets == 1)
1909         *rset = result_sets[0];
1910     else
1911         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1912                                 num_result_sets, result_sets);
1913     if (!*rset)
1914         return ZEBRA_FAIL;
1915     return ZEBRA_OK;
1916 }
1917
1918 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1919                                       Z_AttributesPlusTerm *zapt,
1920                                       const char *termz,
1921                                       const Odr_oid *attributeSet,
1922                                       NMEM stream,
1923                                       const char *rank_type, NMEM rset_nmem,
1924                                       RSET *rset,
1925                                       struct rset_key_control *kc)
1926 {
1927     Record rec;
1928     zint sysno = atozint(termz);
1929     
1930     if (sysno <= 0)
1931         sysno = 0;
1932     rec = rec_get(zh->reg->records, sysno);
1933     if (!rec)
1934         sysno = 0;
1935
1936     rec_free(&rec);
1937
1938     if (sysno <= 0)
1939     {
1940         *rset = rset_create_null(rset_nmem, kc, 0);
1941     }
1942     else
1943     {
1944         RSFD rsfd;
1945         struct it_key key;
1946         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1947                                  res_get(zh->res, "setTmpDir"), 0);
1948         rsfd = rset_open(*rset, RSETF_WRITE);
1949         
1950         key.mem[0] = sysno;
1951         key.mem[1] = 1;
1952         key.len = 2;
1953         rset_write(rsfd, &key);
1954         rset_close(rsfd);
1955     }
1956     return ZEBRA_OK;
1957 }
1958
1959 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1960                                const Odr_oid *attributeSet, NMEM stream,
1961                                Z_SortKeySpecList *sort_sequence,
1962                                const char *rank_type,
1963                                NMEM rset_nmem,
1964                                RSET *rset,
1965                                struct rset_key_control *kc)
1966 {
1967     int i;
1968     int sort_relation_value;
1969     AttrType sort_relation_type;
1970     Z_SortKeySpec *sks;
1971     Z_SortKey *sk;
1972     char termz[20];
1973     
1974     attr_init_APT(&sort_relation_type, zapt, 7);
1975     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1976
1977     if (!sort_sequence->specs)
1978     {
1979         sort_sequence->num_specs = 10;
1980         sort_sequence->specs = (Z_SortKeySpec **)
1981             nmem_malloc(stream, sort_sequence->num_specs *
1982                          sizeof(*sort_sequence->specs));
1983         for (i = 0; i<sort_sequence->num_specs; i++)
1984             sort_sequence->specs[i] = 0;
1985     }
1986     if (zapt->term->which != Z_Term_general)
1987         i = 0;
1988     else
1989         i = atoi_n ((char *) zapt->term->u.general->buf,
1990                     zapt->term->u.general->len);
1991     if (i >= sort_sequence->num_specs)
1992         i = 0;
1993     sprintf(termz, "%d", i);
1994
1995     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1996     sks->sortElement = (Z_SortElement *)
1997         nmem_malloc(stream, sizeof(*sks->sortElement));
1998     sks->sortElement->which = Z_SortElement_generic;
1999     sk = sks->sortElement->u.generic = (Z_SortKey *)
2000         nmem_malloc(stream, sizeof(*sk));
2001     sk->which = Z_SortKey_sortAttributes;
2002     sk->u.sortAttributes = (Z_SortAttributes *)
2003         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2004
2005     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2006     sk->u.sortAttributes->list = zapt->attributes;
2007
2008     sks->sortRelation = (int *)
2009         nmem_malloc(stream, sizeof(*sks->sortRelation));
2010     if (sort_relation_value == 1)
2011         *sks->sortRelation = Z_SortKeySpec_ascending;
2012     else if (sort_relation_value == 2)
2013         *sks->sortRelation = Z_SortKeySpec_descending;
2014     else 
2015         *sks->sortRelation = Z_SortKeySpec_ascending;
2016
2017     sks->caseSensitivity = (int *)
2018         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2019     *sks->caseSensitivity = 0;
2020
2021     sks->which = Z_SortKeySpec_null;
2022     sks->u.null = odr_nullval ();
2023     sort_sequence->specs[i] = sks;
2024     *rset = rset_create_null(rset_nmem, kc, 0);
2025     return ZEBRA_OK;
2026 }
2027
2028
2029 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2030                            const Odr_oid *attributeSet,
2031                            struct xpath_location_step *xpath, int max,
2032                            NMEM mem)
2033 {
2034     const Odr_oid *curAttributeSet = attributeSet;
2035     AttrType use;
2036     const char *use_string = 0;
2037     
2038     attr_init_APT(&use, zapt, 1);
2039     attr_find_ex(&use, &curAttributeSet, &use_string);
2040
2041     if (!use_string || *use_string != '/')
2042         return -1;
2043
2044     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2045 }
2046  
2047                
2048
2049 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2050                         int reg_type, const char *term, 
2051                         const char *xpath_use,
2052                         NMEM rset_nmem,
2053                         struct rset_key_control *kc)
2054 {
2055     struct grep_info grep_info;
2056     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2057                                            zinfo_index_category_index,
2058                                            reg_type, xpath_use);
2059     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2060         return rset_create_null(rset_nmem, kc, 0);
2061     
2062     if (ord < 0)
2063         return rset_create_null(rset_nmem, kc, 0);
2064     else
2065     {
2066         int i, r, max_pos;
2067         char ord_buf[32];
2068         RSET rset;
2069         WRBUF term_dict = wrbuf_alloc();
2070         int ord_len = key_SU_encode (ord, ord_buf);
2071         int term_type = Z_Term_characterString;
2072         const char *flags = "void";
2073
2074         wrbuf_putc(term_dict, '(');
2075         for (i = 0; i<ord_len; i++)
2076         {
2077             wrbuf_putc(term_dict, 1);
2078             wrbuf_putc(term_dict, ord_buf[i]);
2079         }
2080         wrbuf_putc(term_dict, ')');
2081         wrbuf_puts(term_dict, term);
2082         
2083         grep_info.isam_p_indx = 0;
2084         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2085                              &grep_info, &max_pos, 0, grep_handle);
2086         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2087                 grep_info.isam_p_indx);
2088         rset = rset_trunc(zh, grep_info.isam_p_buf,
2089                           grep_info.isam_p_indx, term, strlen(term),
2090                           flags, 1, term_type, rset_nmem,
2091                           kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2092                           0 /* term_ref_id_str */);
2093         grep_info_delete(&grep_info);
2094         wrbuf_destroy(term_dict);
2095         return rset;
2096     }
2097 }
2098
2099 static
2100 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2101                            int num_bases, char **basenames,
2102                            NMEM stream, const char *rank_type, RSET rset,
2103                            int xpath_len, struct xpath_location_step *xpath,
2104                            NMEM rset_nmem,
2105                            RSET *rset_out,
2106                            struct rset_key_control *kc)
2107 {
2108     int base_no;
2109     int i;
2110     int always_matches = rset ? 0 : 1;
2111
2112     if (xpath_len < 0)
2113     {
2114         *rset_out = rset;
2115         return ZEBRA_OK;
2116     }
2117
2118     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2119     for (i = 0; i<xpath_len; i++)
2120     {
2121         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2122
2123     }
2124
2125     /*
2126       //a    ->    a/.*
2127       //a/b  ->    b/a/.*
2128       /a     ->    a/
2129       /a/b   ->    b/a/
2130
2131       /      ->    none
2132
2133    a[@attr = value]/b[@other = othervalue]
2134
2135  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2136  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2137  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2138  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2139  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2140  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2141       
2142     */
2143
2144     dict_grep_cmap (zh->reg->dict, 0, 0);
2145
2146     for (base_no = 0; base_no < num_bases; base_no++)
2147     {
2148         int level = xpath_len;
2149         int first_path = 1;
2150         
2151         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2152         {
2153             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2154                            basenames[base_no]);
2155             *rset_out = rset;
2156             return ZEBRA_FAIL;
2157         }
2158         while (--level >= 0)
2159         {
2160             WRBUF xpath_rev = wrbuf_alloc();
2161             int i;
2162             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2163
2164             for (i = level; i >= 1; --i)
2165             {
2166                 const char *cp = xpath[i].part;
2167                 if (*cp)
2168                 {
2169                     for (; *cp; cp++)
2170                     {
2171                         if (*cp == '*')
2172                             wrbuf_puts(xpath_rev, "[^/]*");
2173                         else if (*cp == ' ')
2174                             wrbuf_puts(xpath_rev, "\001 ");
2175                         else
2176                             wrbuf_putc(xpath_rev, *cp);
2177
2178                         /* wrbuf_putc does not null-terminate , but
2179                            wrbuf_puts below ensures it does.. so xpath_rev
2180                            is OK iff length is > 0 */
2181                     }
2182                     wrbuf_puts(xpath_rev, "/");
2183                 }
2184                 else if (i == 1)  /* // case */
2185                     wrbuf_puts(xpath_rev, ".*");
2186             }
2187             if (xpath[level].predicate &&
2188                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2189                 xpath[level].predicate->u.relation.name[0])
2190             {
2191                 WRBUF wbuf = wrbuf_alloc();
2192                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2193                 if (xpath[level].predicate->u.relation.value)
2194                 {
2195                     const char *cp = xpath[level].predicate->u.relation.value;
2196                     wrbuf_putc(wbuf, '=');
2197                     
2198                     while (*cp)
2199                     {
2200                         if (strchr(REGEX_CHARS, *cp))
2201                             wrbuf_putc(wbuf, '\\');
2202                         wrbuf_putc(wbuf, *cp);
2203                         cp++;
2204                     }
2205                 }
2206                 rset_attr = xpath_trunc(
2207                     zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2208                     rset_nmem, kc);
2209                 wrbuf_destroy(wbuf);
2210             } 
2211             else 
2212             {
2213                 if (!first_path)
2214                 {
2215                     wrbuf_destroy(xpath_rev);
2216                     continue;
2217                 }
2218             }
2219             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2220                     wrbuf_cstr(xpath_rev));
2221             if (wrbuf_len(xpath_rev))
2222             {
2223                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2224                                              wrbuf_cstr(xpath_rev),
2225                                              ZEBRA_XPATH_ELM_BEGIN, 
2226                                              rset_nmem, kc);
2227                 if (always_matches)
2228                     rset = rset_start_tag;
2229                 else
2230                 {
2231                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2232                                                wrbuf_cstr(xpath_rev),
2233                                                ZEBRA_XPATH_ELM_END, 
2234                                                rset_nmem, kc);
2235                     
2236                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2237                                                rset_start_tag, rset,
2238                                                rset_end_tag, rset_attr);
2239                 }
2240             }
2241             wrbuf_destroy(xpath_rev);
2242             first_path = 0;
2243         }
2244     }
2245     *rset_out = rset;
2246     return ZEBRA_OK;
2247 }
2248
2249 #define MAX_XPATH_STEPS 10
2250
2251 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2252                                 const Odr_oid *attributeSet, NMEM stream,
2253                                 Z_SortKeySpecList *sort_sequence,
2254                                 int num_bases, char **basenames, 
2255                                 NMEM rset_nmem,
2256                                 RSET *rset,
2257                                 struct rset_key_control *kc)
2258 {
2259     ZEBRA_RES res = ZEBRA_OK;
2260     unsigned reg_id;
2261     char *search_type = NULL;
2262     char rank_type[128];
2263     int complete_flag;
2264     int sort_flag;
2265     char termz[IT_MAX_WORD+1];
2266     int xpath_len;
2267     const char *xpath_use = 0;
2268     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2269
2270     if (!log_level_set)
2271     {
2272         log_level_rpn = yaz_log_module_level("rpn");
2273         log_level_set = 1;
2274     }
2275     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2276                     rank_type, &complete_flag, &sort_flag);
2277     
2278     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2279     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2280     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2281     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2282
2283     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2284         return ZEBRA_FAIL;
2285
2286     if (sort_flag)
2287         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2288                              rank_type, rset_nmem, rset, kc);
2289     /* consider if an X-Path query is used */
2290     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2291                                 xpath, MAX_XPATH_STEPS, stream);
2292     if (xpath_len >= 0)
2293     {
2294         if (xpath[xpath_len-1].part[0] == '@') 
2295             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2296         else
2297             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2298
2299         if (1)
2300         {
2301             AttrType relation;
2302             int relation_value;
2303
2304             attr_init_APT(&relation, zapt, 2);
2305             relation_value = attr_find(&relation, NULL);
2306
2307             if (relation_value == 103) /* alwaysmatches */
2308             {
2309                 *rset = 0; /* signal no "term" set */
2310                 return rpn_search_xpath(zh, num_bases, basenames,
2311                                         stream, rank_type, *rset, 
2312                                         xpath_len, xpath, rset_nmem, rset, kc);
2313             }
2314         }
2315     }
2316
2317     /* search using one of the various search type strategies
2318        termz is our UTF-8 search term
2319        attributeSet is top-level default attribute set 
2320        stream is ODR for search
2321        reg_id is the register type
2322        complete_flag is 1 for complete subfield, 0 for incomplete
2323        xpath_use is use-attribute to be used for X-Path search, 0 for none
2324     */
2325     if (!strcmp(search_type, "phrase"))
2326     {
2327         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2328                                     reg_id, complete_flag, rank_type,
2329                                     xpath_use,
2330                                     num_bases, basenames, rset_nmem,
2331                                     rset, kc);
2332     }
2333     else if (!strcmp(search_type, "and-list"))
2334     {
2335         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2336                                       reg_id, complete_flag, rank_type,
2337                                       xpath_use,
2338                                       num_bases, basenames, rset_nmem,
2339                                       rset, kc);
2340     }
2341     else if (!strcmp(search_type, "or-list"))
2342     {
2343         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2344                                      reg_id, complete_flag, rank_type,
2345                                      xpath_use,
2346                                      num_bases, basenames, rset_nmem,
2347                                      rset, kc);
2348     }
2349     else if (!strcmp(search_type, "local"))
2350     {
2351         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2352                                    rank_type, rset_nmem, rset, kc);
2353     }
2354     else if (!strcmp(search_type, "numeric"))
2355     {
2356         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2357                                      reg_id, complete_flag, rank_type,
2358                                      xpath_use,
2359                                      num_bases, basenames, rset_nmem,
2360                                      rset, kc);
2361     }
2362     else
2363     {
2364         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2365         res = ZEBRA_FAIL;
2366     }
2367     if (res != ZEBRA_OK)
2368         return res;
2369     if (!*rset)
2370         return ZEBRA_FAIL;
2371     return rpn_search_xpath(zh, num_bases, basenames,
2372                             stream, rank_type, *rset, 
2373                             xpath_len, xpath, rset_nmem, rset, kc);
2374 }
2375
2376 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2377                                       const Odr_oid *attributeSet, 
2378                                       NMEM stream, NMEM rset_nmem,
2379                                       Z_SortKeySpecList *sort_sequence,
2380                                       int num_bases, char **basenames,
2381                                       RSET **result_sets, int *num_result_sets,
2382                                       Z_Operator *parent_op,
2383                                       struct rset_key_control *kc);
2384
2385 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2386                                    zint *approx_limit)
2387 {
2388     ZEBRA_RES res = ZEBRA_OK;
2389     if (zs->which == Z_RPNStructure_complex)
2390     {
2391         if (res == ZEBRA_OK)
2392             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2393                                            approx_limit);
2394         if (res == ZEBRA_OK)
2395             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2396                                            approx_limit);
2397     }
2398     else if (zs->which == Z_RPNStructure_simple)
2399     {
2400         if (zs->u.simple->which == Z_Operand_APT)
2401         {
2402             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2403             AttrType global_hits_limit_attr;
2404             int l;
2405             
2406             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2407             
2408             l = attr_find(&global_hits_limit_attr, NULL);
2409             if (l != -1)
2410                 *approx_limit = l;
2411         }
2412     }
2413     return res;
2414 }
2415
2416 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2417                          const Odr_oid *attributeSet, 
2418                          NMEM stream, NMEM rset_nmem,
2419                          Z_SortKeySpecList *sort_sequence,
2420                          int num_bases, char **basenames,
2421                          RSET *result_set)
2422 {
2423     RSET *result_sets = 0;
2424     int num_result_sets = 0;
2425     ZEBRA_RES res;
2426     struct rset_key_control *kc = zebra_key_control_create(zh);
2427
2428     res = rpn_search_structure(zh, zs, attributeSet,
2429                                stream, rset_nmem,
2430                                sort_sequence, 
2431                                num_bases, basenames,
2432                                &result_sets, &num_result_sets,
2433                                0 /* no parent op */,
2434                                kc);
2435     if (res != ZEBRA_OK)
2436     {
2437         int i;
2438         for (i = 0; i<num_result_sets; i++)
2439             rset_delete(result_sets[i]);
2440         *result_set = 0;
2441     }
2442     else
2443     {
2444         assert(num_result_sets == 1);
2445         assert(result_sets);
2446         assert(*result_sets);
2447         *result_set = *result_sets;
2448     }
2449     (*kc->dec)(kc);
2450     return res;
2451 }
2452
2453 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2454                                const Odr_oid *attributeSet, 
2455                                NMEM stream, NMEM rset_nmem,
2456                                Z_SortKeySpecList *sort_sequence,
2457                                int num_bases, char **basenames,
2458                                RSET **result_sets, int *num_result_sets,
2459                                Z_Operator *parent_op,
2460                                struct rset_key_control *kc)
2461 {
2462     *num_result_sets = 0;
2463     if (zs->which == Z_RPNStructure_complex)
2464     {
2465         ZEBRA_RES res;
2466         Z_Operator *zop = zs->u.complex->roperator;
2467         RSET *result_sets_l = 0;
2468         int num_result_sets_l = 0;
2469         RSET *result_sets_r = 0;
2470         int num_result_sets_r = 0;
2471
2472         res = rpn_search_structure(zh, zs->u.complex->s1,
2473                                    attributeSet, stream, rset_nmem,
2474                                    sort_sequence,
2475                                    num_bases, basenames,
2476                                    &result_sets_l, &num_result_sets_l,
2477                                    zop, kc);
2478         if (res != ZEBRA_OK)
2479         {
2480             int i;
2481             for (i = 0; i<num_result_sets_l; i++)
2482                 rset_delete(result_sets_l[i]);
2483             return res;
2484         }
2485         res = rpn_search_structure(zh, zs->u.complex->s2,
2486                                    attributeSet, stream, rset_nmem,
2487                                    sort_sequence,
2488                                    num_bases, basenames,
2489                                    &result_sets_r, &num_result_sets_r,
2490                                    zop, kc);
2491         if (res != ZEBRA_OK)
2492         {
2493             int i;
2494             for (i = 0; i<num_result_sets_l; i++)
2495                 rset_delete(result_sets_l[i]);
2496             for (i = 0; i<num_result_sets_r; i++)
2497                 rset_delete(result_sets_r[i]);
2498             return res;
2499         }
2500
2501         /* make a new list of result for all children */
2502         *num_result_sets = num_result_sets_l + num_result_sets_r;
2503         *result_sets = nmem_malloc(stream, *num_result_sets * 
2504                                    sizeof(**result_sets));
2505         memcpy(*result_sets, result_sets_l, 
2506                num_result_sets_l * sizeof(**result_sets));
2507         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2508                num_result_sets_r * sizeof(**result_sets));
2509
2510         if (!parent_op || parent_op->which != zop->which
2511             || (zop->which != Z_Operator_and &&
2512                 zop->which != Z_Operator_or))
2513         {
2514             /* parent node different from this one (or non-present) */
2515             /* we must combine result sets now */
2516             RSET rset;
2517             switch (zop->which)
2518             {
2519             case Z_Operator_and:
2520                 rset = rset_create_and(rset_nmem, kc,
2521                                        kc->scope,
2522                                        *num_result_sets, *result_sets);
2523                 break;
2524             case Z_Operator_or:
2525                 rset = rset_create_or(rset_nmem, kc,
2526                                       kc->scope, 0, /* termid */
2527                                       *num_result_sets, *result_sets);
2528                 break;
2529             case Z_Operator_and_not:
2530                 rset = rset_create_not(rset_nmem, kc,
2531                                        kc->scope,
2532                                        (*result_sets)[0],
2533                                        (*result_sets)[1]);
2534                 break;
2535             case Z_Operator_prox:
2536                 if (zop->u.prox->which != Z_ProximityOperator_known)
2537                 {
2538                     zebra_setError(zh, 
2539                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2540                                    0);
2541                     return ZEBRA_FAIL;
2542                 }
2543                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2544                 {
2545                     zebra_setError_zint(zh,
2546                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2547                                         *zop->u.prox->u.known);
2548                     return ZEBRA_FAIL;
2549                 }
2550                 else
2551                 {
2552                     rset = rset_create_prox(rset_nmem, kc,
2553                                             kc->scope,
2554                                             *num_result_sets, *result_sets, 
2555                                             *zop->u.prox->ordered,
2556                                             (!zop->u.prox->exclusion ? 
2557                                              0 : *zop->u.prox->exclusion),
2558                                             *zop->u.prox->relationType,
2559                                             *zop->u.prox->distance );
2560                 }
2561                 break;
2562             default:
2563                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2564                 return ZEBRA_FAIL;
2565             }
2566             *num_result_sets = 1;
2567             *result_sets = nmem_malloc(stream, *num_result_sets * 
2568                                        sizeof(**result_sets));
2569             (*result_sets)[0] = rset;
2570         }
2571     }
2572     else if (zs->which == Z_RPNStructure_simple)
2573     {
2574         RSET rset;
2575         ZEBRA_RES res;
2576
2577         if (zs->u.simple->which == Z_Operand_APT)
2578         {
2579             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2580             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2581                                  attributeSet, stream, sort_sequence,
2582                                  num_bases, basenames, rset_nmem, &rset,
2583                                  kc);
2584             if (res != ZEBRA_OK)
2585                 return res;
2586         }
2587         else if (zs->u.simple->which == Z_Operand_resultSetId)
2588         {
2589             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2590             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2591             if (!rset)
2592             {
2593                 zebra_setError(zh, 
2594                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2595                                zs->u.simple->u.resultSetId);
2596                 return ZEBRA_FAIL;
2597             }
2598             rset_dup(rset);
2599         }
2600         else
2601         {
2602             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2603             return ZEBRA_FAIL;
2604         }
2605         *num_result_sets = 1;
2606         *result_sets = nmem_malloc(stream, *num_result_sets * 
2607                                    sizeof(**result_sets));
2608         (*result_sets)[0] = rset;
2609     }
2610     else
2611     {
2612         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2613         return ZEBRA_FAIL;
2614     }
2615     return ZEBRA_OK;
2616 }
2617
2618
2619
2620 /*
2621  * Local variables:
2622  * c-basic-offset: 4
2623  * indent-tabs-mode: nil
2624  * End:
2625  * vim: shiftwidth=4 tabstop=8 expandtab
2626  */
2627