Honor approximative limits for scan (type 12).
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.20 2007-11-01 14:10:03 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
69 }
70
71 #define TERM_COUNT        
72        
73 struct grep_info {        
74 #ifdef TERM_COUNT        
75     int *term_no;        
76 #endif        
77     ISAM_P *isam_p_buf;
78     int isam_p_size;        
79     int isam_p_indx;
80     int trunc_max;
81     ZebraHandle zh;
82     const char *index_type;
83     ZebraSet termset;
84 };        
85
86 static int add_isam_p(const char *name, const char *info,
87                       struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     /* we may have to stop this madness.. NOTE: -1 so that if
95        truncmax == trunxlimit we do *not* generate result sets */
96     if (p->isam_p_indx >= p->trunc_max - 1)
97         return 1;
98
99     if (p->isam_p_indx == p->isam_p_size)
100     {
101         ISAM_P *new_isam_p_buf;
102 #ifdef TERM_COUNT        
103         int *new_term_no;        
104 #endif
105         p->isam_p_size = 2*p->isam_p_size + 100;
106         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
107                                             p->isam_p_size);
108         if (p->isam_p_buf)
109         {
110             memcpy(new_isam_p_buf, p->isam_p_buf,
111                     p->isam_p_indx * sizeof(*p->isam_p_buf));
112             xfree(p->isam_p_buf);
113         }
114         p->isam_p_buf = new_isam_p_buf;
115
116 #ifdef TERM_COUNT
117         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
118         if (p->term_no)
119         {
120             memcpy(new_term_no, p->isam_p_buf,
121                     p->isam_p_indx * sizeof(*p->term_no));
122             xfree(p->term_no);
123         }
124         p->term_no = new_term_no;
125 #endif
126     }
127     assert(*info == sizeof(*p->isam_p_buf));
128     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
129
130     if (p->termset)
131     {
132         const char *db;
133         char term_tmp[IT_MAX_WORD];
134         int ord = 0;
135         const char *index_name;
136         int len = key_SU_decode(&ord, (const unsigned char *) name);
137         
138         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140         zebraExplain_lookup_ord(p->zh->reg->zei,
141                                 ord, 0 /* index_type */, &db, &index_name);
142         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
143         
144         resultSetAddTerm(p->zh, p->termset, name[len], db,
145                          index_name, term_tmp);
146     }
147     (p->isam_p_indx)++;
148     return 0;
149 }
150
151 static int grep_handle(char *name, const char *info, void *p)
152 {
153     return add_isam_p(name, info, (struct grep_info *) p);
154 }
155
156 static int term_pre(zebra_map_t zm, const char **src,
157                     const char *ct1, const char *ct2, int first)
158 {
159     const char *s1, *s0 = *src;
160     const char **map;
161
162     /* skip white space */
163     while (*s0)
164     {
165         if (ct1 && strchr(ct1, *s0))
166             break;
167         if (ct2 && strchr(ct2, *s0))
168             break;
169         s1 = s0;
170         map = zebra_maps_input(zm, &s1, strlen(s1), first);
171         if (**map != *CHR_SPACE)
172             break;
173         s0 = s1;
174     }
175     *src = s0;
176     return *s0;
177 }
178
179
180 static void esc_str(char *out_buf, size_t out_size,
181                     const char *in_buf, int in_size)
182 {
183     int k;
184
185     assert(out_buf);
186     assert(in_buf);
187     assert(out_size > 20);
188     *out_buf = '\0';
189     for (k = 0; k<in_size; k++)
190     {
191         int c = in_buf[k] & 0xff;
192         int pc;
193         if (c < 32 || c > 126)
194             pc = '?';
195         else
196             pc = c;
197         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
198         if (strlen(out_buf) > out_size-20)
199         {
200             strcat(out_buf, "..");
201             break;
202         }
203     }
204 }
205
206 #define REGEX_CHARS " []()|.*+?!"
207
208 static void add_non_space(const char *start, const char *end,
209                           WRBUF term_dict,
210                           char *dst_term, int *dst_ptr,
211                           const char **map, int q_map_match)
212 {
213     size_t sz = end - start;
214     memcpy(dst_term + *dst_ptr, start, sz);
215     (*dst_ptr) += sz;
216     if (!q_map_match)
217     {
218         while (start < end)
219         {
220             if (strchr(REGEX_CHARS, *start))
221                 wrbuf_putc(term_dict, '\\');
222             wrbuf_putc(term_dict, *start);
223             start++;
224         }
225     }
226     else
227     {
228         char tmpbuf[80];
229         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
230         
231         wrbuf_puts(term_dict, map[0]);
232     }
233 }
234
235 /* term_100: handle term, where trunc = none(no operators at all) */
236 static int term_100(zebra_map_t zm,
237                     const char **src, WRBUF term_dict, int space_split,
238                     char *dst_term)
239 {
240     const char *s0;
241     const char **map;
242     int i = 0;
243     int j = 0;
244
245     const char *space_start = 0;
246     const char *space_end = 0;
247
248     if (!term_pre(zm, src, NULL, NULL, !space_split))
249         return 0;
250     s0 = *src;
251     while (*s0)
252     {
253         const char *s1 = s0;
254         int q_map_match = 0;
255         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
256         if (space_split)
257         {
258             if (**map == *CHR_SPACE)
259                 break;
260         }
261         else  /* complete subfield only. */
262         {
263             if (**map == *CHR_SPACE)
264             {   /* save space mapping for later  .. */
265                 space_start = s1;
266                 space_end = s0;
267                 continue;
268             }
269             else if (space_start)
270             {   /* reload last space */
271                 while (space_start < space_end)
272                 {
273                     if (strchr(REGEX_CHARS, *space_start))
274                         wrbuf_putc(term_dict, '\\');
275                     dst_term[j++] = *space_start;
276                     wrbuf_putc(term_dict, *space_start);
277                     space_start++;
278                                
279                 }
280                 /* and reset */
281                 space_start = space_end = 0;
282             }
283         }
284         i++;
285
286         add_non_space(s1, s0, term_dict, dst_term, &j,
287                       map, q_map_match);
288     }
289     dst_term[j] = '\0';
290     *src = s0;
291     return i;
292 }
293
294 /* term_101: handle term, where trunc = Process # */
295 static int term_101(zebra_map_t zm,
296                     const char **src, WRBUF term_dict, int space_split,
297                     char *dst_term)
298 {
299     const char *s0;
300     const char **map;
301     int i = 0;
302     int j = 0;
303
304     if (!term_pre(zm, src, "#", "#", !space_split))
305         return 0;
306     s0 = *src;
307     while (*s0)
308     {
309         if (*s0 == '#')
310         {
311             i++;
312             wrbuf_puts(term_dict, ".*");
313             dst_term[j++] = *s0++;
314         }
315         else
316         {
317             const char *s1 = s0;
318             int q_map_match = 0;
319             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
320             if (space_split && **map == *CHR_SPACE)
321                 break;
322
323             i++;
324             add_non_space(s1, s0, term_dict, dst_term, &j,
325                           map, q_map_match);
326         }
327     }
328     dst_term[j++] = '\0';
329     *src = s0;
330     return i;
331 }
332
333 /* term_103: handle term, where trunc = re-2 (regular expressions) */
334 static int term_103(zebra_map_t zm, const char **src,
335                     WRBUF term_dict, int *errors, int space_split,
336                     char *dst_term)
337 {
338     int i = 0;
339     int j = 0;
340     const char *s0;
341     const char **map;
342
343     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
344         return 0;
345     s0 = *src;
346     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
347         isdigit(((const unsigned char *)s0)[1]))
348     {
349         *errors = s0[1] - '0';
350         s0 += 3;
351         if (*errors > 3)
352             *errors = 3;
353     }
354     while (*s0)
355     {
356         if (strchr("^\\()[].*+?|-", *s0))
357         {
358             dst_term[j++] = *s0;
359             wrbuf_putc(term_dict, *s0);
360             s0++;
361             i++;
362         }
363         else
364         {
365             const char *s1 = s0;
366             int q_map_match = 0;
367             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
368             if (space_split && **map == *CHR_SPACE)
369                 break;
370
371             i++;
372             add_non_space(s1, s0, term_dict, dst_term, &j,
373                           map, q_map_match);
374         }
375     }
376     dst_term[j] = '\0';
377     *src = s0;
378     
379     return i;
380 }
381
382 /* term_103: handle term, where trunc = re-1 (regular expressions) */
383 static int term_102(zebra_map_t zm, const char **src,
384                     WRBUF term_dict, int space_split, char *dst_term)
385 {
386     return term_103(zm, src, term_dict, NULL, space_split, dst_term);
387 }
388
389
390 /* term_104: handle term, process # and ! */
391 static int term_104(zebra_map_t zm, const char **src, 
392                     WRBUF term_dict, int space_split, char *dst_term)
393 {
394     const char *s0;
395     const char **map;
396     int i = 0;
397     int j = 0;
398
399     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
400         return 0;
401     s0 = *src;
402     while (*s0)
403     {
404         if (*s0 == '?')
405         {
406             i++;
407             dst_term[j++] = *s0++;
408             if (*s0 >= '0' && *s0 <= '9')
409             {
410                 int limit = 0;
411                 while (*s0 >= '0' && *s0 <= '9')
412                 {
413                     limit = limit * 10 + (*s0 - '0');
414                     dst_term[j++] = *s0++;
415                 }
416                 if (limit > 20)
417                     limit = 20;
418                 while (--limit >= 0)
419                 {
420                     wrbuf_puts(term_dict, ".?");
421                 }
422             }
423             else
424             {
425                 wrbuf_puts(term_dict, ".*");
426             }
427         }
428         else if (*s0 == '*')
429         {
430             i++;
431             wrbuf_puts(term_dict, ".*");
432             dst_term[j++] = *s0++;
433         }
434         else if (*s0 == '#')
435         {
436             i++;
437             wrbuf_puts(term_dict, ".");
438             dst_term[j++] = *s0++;
439         }
440         else
441         {
442             const char *s1 = s0;
443             int q_map_match = 0;
444             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
445             if (space_split && **map == *CHR_SPACE)
446                 break;
447
448             i++;
449             add_non_space(s1, s0, term_dict, dst_term, &j,
450                           map, q_map_match);
451         }
452     }
453     dst_term[j++] = '\0';
454     *src = s0;
455     return i;
456 }
457
458 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
459 static int term_105(zebra_map_t zm, const char **src, 
460                     WRBUF term_dict, int space_split,
461                     char *dst_term, int right_truncate)
462 {
463     const char *s0;
464     const char **map;
465     int i = 0;
466     int j = 0;
467
468     if (!term_pre(zm, src, "*!", "*!", !space_split))
469         return 0;
470     s0 = *src;
471     while (*s0)
472     {
473         if (*s0 == '*')
474         {
475             i++;
476             wrbuf_puts(term_dict, ".*");
477             dst_term[j++] = *s0++;
478         }
479         else if (*s0 == '!')
480         {
481             i++;
482             wrbuf_putc(term_dict, '.');
483             dst_term[j++] = *s0++;
484         }
485         else
486         {
487             const char *s1 = s0;
488             int q_map_match = 0;
489             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
490             if (space_split && **map == *CHR_SPACE)
491                 break;
492
493             i++;
494             add_non_space(s1, s0, term_dict, dst_term, &j,
495                           map, q_map_match);
496         }
497     }
498     if (right_truncate)
499         wrbuf_puts(term_dict, ".*");
500     dst_term[j++] = '\0';
501     *src = s0;
502     return i;
503 }
504
505
506 /* gen_regular_rel - generate regular expression from relation
507  *  val:     border value (inclusive)
508  *  islt:    1 if <=; 0 if >=.
509  */
510 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
511 {
512     char dst_buf[20*5*20]; /* assuming enough for expansion */
513     char *dst = dst_buf;
514     int dst_p;
515     int w, d, i;
516     int pos = 0;
517     char numstr[20];
518
519     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
520     if (val >= 0)
521     {
522         if (islt)
523             strcpy(dst, "(-[0-9]+|(");
524         else
525             strcpy(dst, "((");
526     } 
527     else
528     {
529         if (!islt)
530         {
531             strcpy(dst, "([0-9]+|-(");
532             islt = 1;
533         }
534         else
535         {
536             strcpy(dst, "(-(");
537             islt = 0;
538         }
539         val = -val;
540     }
541     dst_p = strlen(dst);
542     sprintf(numstr, "%d", val);
543     for (w = strlen(numstr); --w >= 0; pos++)
544     {
545         d = numstr[w];
546         if (pos > 0)
547         {
548             if (islt)
549             {
550                 if (d == '0')
551                     continue;
552                 d--;
553             } 
554             else
555             {
556                 if (d == '9')
557                     continue;
558                 d++;
559             }
560         }
561         
562         strcpy(dst + dst_p, numstr);
563         dst_p = strlen(dst) - pos - 1;
564
565         if (islt)
566         {
567             if (d != '0')
568             {
569                 dst[dst_p++] = '[';
570                 dst[dst_p++] = '0';
571                 dst[dst_p++] = '-';
572                 dst[dst_p++] = d;
573                 dst[dst_p++] = ']';
574             }
575             else
576                 dst[dst_p++] = d;
577         }
578         else
579         {
580             if (d != '9')
581             { 
582                 dst[dst_p++] = '[';
583                 dst[dst_p++] = d;
584                 dst[dst_p++] = '-';
585                 dst[dst_p++] = '9';
586                 dst[dst_p++] = ']';
587             }
588             else
589                 dst[dst_p++] = d;
590         }
591         for (i = 0; i<pos; i++)
592         {
593             dst[dst_p++] = '[';
594             dst[dst_p++] = '0';
595             dst[dst_p++] = '-';
596             dst[dst_p++] = '9';
597             dst[dst_p++] = ']';
598         }
599         dst[dst_p++] = '|';
600     }
601     dst[dst_p] = '\0';
602     if (islt)
603     {
604         /* match everything less than 10^(pos-1) */
605         strcat(dst, "0*");
606         for (i = 1; i<pos; i++)
607             strcat(dst, "[0-9]?");
608     }
609     else
610     {
611         /* match everything greater than 10^pos */
612         for (i = 0; i <= pos; i++)
613             strcat(dst, "[0-9]");
614         strcat(dst, "[0-9]*");
615     }
616     strcat(dst, "))");
617     wrbuf_puts(term_dict, dst);
618 }
619
620 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
621 {
622     const char *src = wrbuf_cstr(wsrc);
623     if (src[*indx] == '\\')
624     {
625         wrbuf_putc(term_p, src[*indx]);
626         (*indx)++;
627     }
628     wrbuf_putc(term_p, src[*indx]);
629     (*indx)++;
630 }
631
632 /*
633  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
634  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
635  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
636  *              ([^-a].*|a[^-b].*|ab[c-].*)
637  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
638  *              ([^a-].*|a[^b-].*|ab[^c-].*)
639  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
640  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
641  */
642 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
643                            const char **term_sub, WRBUF term_dict,
644                            const Odr_oid *attributeSet,
645                            zebra_map_t zm, int space_split, char *term_dst,
646                            int *error_code)
647 {
648     AttrType relation;
649     int relation_value;
650     int i;
651     WRBUF term_component = wrbuf_alloc();
652
653     attr_init_APT(&relation, zapt, 2);
654     relation_value = attr_find(&relation, NULL);
655
656     *error_code = 0;
657     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
658     switch (relation_value)
659     {
660     case 1:
661         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
662         {
663             wrbuf_destroy(term_component);
664             return 0;
665         }
666         yaz_log(log_level_rpn, "Relation <");
667         
668         wrbuf_putc(term_dict, '(');
669         for (i = 0; i < wrbuf_len(term_component); )
670         {
671             int j = 0;
672             
673             if (i)
674                 wrbuf_putc(term_dict, '|');
675             while (j < i)
676                 string_rel_add_char(term_dict, term_component, &j);
677
678             wrbuf_putc(term_dict, '[');
679
680             wrbuf_putc(term_dict, '^');
681             
682             wrbuf_putc(term_dict, 1);
683             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
684             
685             string_rel_add_char(term_dict, term_component, &i);
686             wrbuf_putc(term_dict, '-');
687             
688             wrbuf_putc(term_dict, ']');
689             wrbuf_putc(term_dict, '.');
690             wrbuf_putc(term_dict, '*');
691         }
692         wrbuf_putc(term_dict, ')');
693         break;
694     case 2:
695         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
696         {
697             wrbuf_destroy(term_component);
698             return 0;
699         }
700         yaz_log(log_level_rpn, "Relation <=");
701
702         wrbuf_putc(term_dict, '(');
703         for (i = 0; i < wrbuf_len(term_component); )
704         {
705             int j = 0;
706
707             while (j < i)
708                 string_rel_add_char(term_dict, term_component, &j);
709             wrbuf_putc(term_dict, '[');
710
711             wrbuf_putc(term_dict, '^');
712
713             wrbuf_putc(term_dict, 1);
714             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
715
716             string_rel_add_char(term_dict, term_component, &i);
717             wrbuf_putc(term_dict, '-');
718
719             wrbuf_putc(term_dict, ']');
720             wrbuf_putc(term_dict, '.');
721             wrbuf_putc(term_dict, '*');
722
723             wrbuf_putc(term_dict, '|');
724         }
725         for (i = 0; i < wrbuf_len(term_component); )
726             string_rel_add_char(term_dict, term_component, &i);
727         wrbuf_putc(term_dict, ')');
728         break;
729     case 5:
730         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
731         {
732             wrbuf_destroy(term_component);
733             return 0;
734         }
735         yaz_log(log_level_rpn, "Relation >");
736
737         wrbuf_putc(term_dict, '(');
738         for (i = 0; i < wrbuf_len(term_component); )
739         {
740             int j = 0;
741
742             while (j < i)
743                 string_rel_add_char(term_dict, term_component, &j);
744             wrbuf_putc(term_dict, '[');
745             
746             wrbuf_putc(term_dict, '^');
747             wrbuf_putc(term_dict, '-');
748             string_rel_add_char(term_dict, term_component, &i);
749
750             wrbuf_putc(term_dict, ']');
751             wrbuf_putc(term_dict, '.');
752             wrbuf_putc(term_dict, '*');
753
754             wrbuf_putc(term_dict, '|');
755         }
756         for (i = 0; i < wrbuf_len(term_component); )
757             string_rel_add_char(term_dict, term_component, &i);
758         wrbuf_putc(term_dict, '.');
759         wrbuf_putc(term_dict, '+');
760         wrbuf_putc(term_dict, ')');
761         break;
762     case 4:
763         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
764         {
765             wrbuf_destroy(term_component);
766             return 0;
767         }
768         yaz_log(log_level_rpn, "Relation >=");
769
770         wrbuf_putc(term_dict, '(');
771         for (i = 0; i < wrbuf_len(term_component); )
772         {
773             int j = 0;
774
775             if (i)
776                 wrbuf_putc(term_dict, '|');
777             while (j < i)
778                 string_rel_add_char(term_dict, term_component, &j);
779             wrbuf_putc(term_dict, '[');
780
781             if (i < wrbuf_len(term_component)-1)
782             {
783                 wrbuf_putc(term_dict, '^');
784                 wrbuf_putc(term_dict, '-');
785                 string_rel_add_char(term_dict, term_component, &i);
786             }
787             else
788             {
789                 string_rel_add_char(term_dict, term_component, &i);
790                 wrbuf_putc(term_dict, '-');
791             }
792             wrbuf_putc(term_dict, ']');
793             wrbuf_putc(term_dict, '.');
794             wrbuf_putc(term_dict, '*');
795         }
796         wrbuf_putc(term_dict, ')');
797         break;
798     case 3:
799     case 102:
800     case -1:
801         if (!**term_sub)
802             return 1;
803         yaz_log(log_level_rpn, "Relation =");
804         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
805         {
806             wrbuf_destroy(term_component);
807             return 0;
808         }
809         wrbuf_puts(term_dict, "(");
810         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
811         wrbuf_puts(term_dict, ")");
812         break;
813     case 103:
814         yaz_log(log_level_rpn, "Relation always matches");
815         /* skip to end of term (we don't care what it is) */
816         while (**term_sub != '\0')
817             (*term_sub)++;
818         break;
819     default:
820         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
821         wrbuf_destroy(term_component);
822         return 0;
823     }
824     wrbuf_destroy(term_component);
825     return 1;
826 }
827
828 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
829                              const char **term_sub, 
830                              WRBUF term_dict,
831                              const Odr_oid *attributeSet, NMEM stream,
832                              struct grep_info *grep_info,
833                              const char *index_type, int complete_flag,
834                              char *term_dst,
835                              const char *xpath_use,
836                              struct ord_list **ol);
837
838 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
839                                 Z_AttributesPlusTerm *zapt,
840                                 zint *hits_limit_value,
841                                 const char **term_ref_id_str,
842                                 NMEM nmem)
843 {
844     AttrType term_ref_id_attr;
845     AttrType hits_limit_attr;
846     int term_ref_id_int;
847  
848     attr_init_APT(&hits_limit_attr, zapt, 11);
849     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
850
851     attr_init_APT(&term_ref_id_attr, zapt, 10);
852     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
853     if (term_ref_id_int >= 0)
854     {
855         char *res = nmem_malloc(nmem, 20);
856         sprintf(res, "%d", term_ref_id_int);
857         *term_ref_id_str = res;
858     }
859
860     /* no limit given ? */
861     if (*hits_limit_value == -1)
862     {
863         if (*term_ref_id_str)
864         {
865             /* use global if term_ref is present */
866             *hits_limit_value = zh->approx_limit;
867         }
868         else
869         {
870             /* no counting if term_ref is not present */
871             *hits_limit_value = 0;
872         }
873     }
874     else if (*hits_limit_value == 0)
875     {
876         /* 0 is the same as global limit */
877         *hits_limit_value = zh->approx_limit;
878     }
879     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
880             *term_ref_id_str ? *term_ref_id_str : "none",
881             *hits_limit_value);
882     return ZEBRA_OK;
883 }
884
885 static ZEBRA_RES term_trunc(ZebraHandle zh,
886                             Z_AttributesPlusTerm *zapt,
887                             const char **term_sub, 
888                             const Odr_oid *attributeSet, NMEM stream,
889                             struct grep_info *grep_info,
890                             const char *index_type, int complete_flag,
891                             char *term_dst,
892                             const char *rank_type, 
893                             const char *xpath_use,
894                             NMEM rset_nmem,
895                             RSET *rset,
896                             struct rset_key_control *kc)
897 {
898     ZEBRA_RES res;
899     struct ord_list *ol;
900     zint hits_limit_value;
901     const char *term_ref_id_str = 0;
902     WRBUF term_dict = wrbuf_alloc();
903
904     *rset = 0;
905     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
906                           stream);
907     grep_info->isam_p_indx = 0;
908     res = string_term(zh, zapt, term_sub, term_dict,
909                       attributeSet, stream, grep_info,
910                       index_type, complete_flag,
911                       term_dst, xpath_use, &ol);
912     wrbuf_destroy(term_dict);
913     if (res != ZEBRA_OK)
914         return res;
915     if (!*term_sub)  /* no more terms ? */
916         return res;
917     yaz_log(log_level_rpn, "term: %s", term_dst);
918     *rset = rset_trunc(zh, grep_info->isam_p_buf,
919                        grep_info->isam_p_indx, term_dst,
920                        strlen(term_dst), rank_type, 1 /* preserve pos */,
921                        zapt->term->which, rset_nmem,
922                        kc, kc->scope, ol, index_type, hits_limit_value,
923                        term_ref_id_str);
924     if (!*rset)
925         return ZEBRA_FAIL;
926     return ZEBRA_OK;
927 }
928
929 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
930                              const char **term_sub, 
931                              WRBUF term_dict,
932                              const Odr_oid *attributeSet, NMEM stream,
933                              struct grep_info *grep_info,
934                              const char *index_type, int complete_flag,
935                              char *term_dst,
936                              const char *xpath_use,
937                              struct ord_list **ol)
938 {
939     int r;
940     AttrType truncation;
941     int truncation_value;
942     const char *termp;
943     struct rpn_char_map_info rcmi;
944
945     int space_split = complete_flag ? 0 : 1;
946     int ord = -1;
947     int regex_range = 0;
948     int max_pos, prefix_len = 0;
949     int relation_error;
950     char ord_buf[32];
951     int ord_len, i;
952     zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
953     
954     *ol = ord_list_create(stream);
955
956     rpn_char_map_prepare(zh->reg, zm, &rcmi);
957     attr_init_APT(&truncation, zapt, 5);
958     truncation_value = attr_find(&truncation, NULL);
959     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
960
961     termp = *term_sub; /* start of term for each database */
962     
963     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
964                           attributeSet, &ord) != ZEBRA_OK)
965     {
966         *term_sub = 0;
967         return ZEBRA_FAIL;
968     }
969     
970     wrbuf_rewind(term_dict); /* new dictionary regexp term */
971     
972     *ol = ord_list_append(stream, *ol, ord);
973     ord_len = key_SU_encode(ord, ord_buf);
974     
975     wrbuf_putc(term_dict, '(');
976     
977     for (i = 0; i<ord_len; i++)
978     {
979         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
980         wrbuf_putc(term_dict, ord_buf[i]);
981     }
982     wrbuf_putc(term_dict, ')');
983     
984     prefix_len = wrbuf_len(term_dict);
985     
986     switch (truncation_value)
987     {
988     case -1:         /* not specified */
989     case 100:        /* do not truncate */
990         if (!string_relation(zh, zapt, &termp, term_dict,
991                              attributeSet,
992                              zm, space_split, term_dst,
993                              &relation_error))
994         {
995             if (relation_error)
996             {
997                 zebra_setError(zh, relation_error, 0);
998                 return ZEBRA_FAIL;
999             }
1000             *term_sub = 0;
1001             return ZEBRA_OK;
1002         }
1003         break;
1004     case 1:          /* right truncation */
1005         wrbuf_putc(term_dict, '(');
1006         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1007         {
1008             *term_sub = 0;
1009             return ZEBRA_OK;
1010         }
1011         wrbuf_puts(term_dict, ".*)");
1012         break;
1013     case 2:          /* keft truncation */
1014         wrbuf_puts(term_dict, "(.*");
1015         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1016         {
1017             *term_sub = 0;
1018             return ZEBRA_OK;
1019         }
1020         wrbuf_putc(term_dict, ')');
1021         break;
1022     case 3:          /* left&right truncation */
1023         wrbuf_puts(term_dict, "(.*");
1024         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1025         {
1026             *term_sub = 0;
1027             return ZEBRA_OK;
1028         }
1029         wrbuf_puts(term_dict, ".*)");
1030         break;
1031     case 101:        /* process # in term */
1032         wrbuf_putc(term_dict, '(');
1033         if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1034         {
1035             *term_sub = 0;
1036             return ZEBRA_OK;
1037         }
1038         wrbuf_puts(term_dict, ")");
1039         break;
1040     case 102:        /* Regexp-1 */
1041         wrbuf_putc(term_dict, '(');
1042         if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1043         {
1044             *term_sub = 0;
1045             return ZEBRA_OK;
1046         }
1047         wrbuf_putc(term_dict, ')');
1048         break;
1049     case 103:       /* Regexp-2 */
1050         regex_range = 1;
1051         wrbuf_putc(term_dict, '(');
1052         if (!term_103(zm, &termp, term_dict, &regex_range,
1053                       space_split, term_dst))
1054         {
1055             *term_sub = 0;
1056             return ZEBRA_OK;
1057         }
1058         wrbuf_putc(term_dict, ')');
1059         break;
1060     case 104:        /* process # and ! in term */
1061         wrbuf_putc(term_dict, '(');
1062         if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1063         {
1064             *term_sub = 0;
1065             return ZEBRA_OK;
1066         }
1067         wrbuf_putc(term_dict, ')');
1068         break;
1069     case 105:        /* process * and ! in term */
1070         wrbuf_putc(term_dict, '(');
1071         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1072         {
1073             *term_sub = 0;
1074             return ZEBRA_OK;
1075         }
1076         wrbuf_putc(term_dict, ')');
1077         break;
1078     case 106:        /* process * and ! in term */
1079         wrbuf_putc(term_dict, '(');
1080         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1081         {
1082             *term_sub = 0;
1083             return ZEBRA_OK;
1084         }
1085         wrbuf_putc(term_dict, ')');
1086         break;
1087     default:
1088         zebra_setError_zint(zh,
1089                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1090                             truncation_value);
1091         return ZEBRA_FAIL;
1092     }
1093     if (1)
1094     {
1095         char buf[1000];
1096         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1097         esc_str(buf, sizeof(buf), input, strlen(input));
1098     }
1099     yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1100             wrbuf_cstr(term_dict) + prefix_len);
1101     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1102                          grep_info, &max_pos, 
1103                          ord_len /* number of "exact" chars */,
1104                          grep_handle);
1105     if (r == 1)
1106         zebra_set_partial_result(zh);
1107     else if (r)
1108         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1109     *term_sub = termp;
1110     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1111     return ZEBRA_OK;
1112 }
1113
1114
1115
1116 static void grep_info_delete(struct grep_info *grep_info)
1117 {
1118 #ifdef TERM_COUNT
1119     xfree(grep_info->term_no);
1120 #endif
1121     xfree(grep_info->isam_p_buf);
1122 }
1123
1124 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1125                                    Z_AttributesPlusTerm *zapt,
1126                                    struct grep_info *grep_info,
1127                                    const char *index_type)
1128 {
1129 #ifdef TERM_COUNT
1130     grep_info->term_no = 0;
1131 #endif
1132     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1133     grep_info->isam_p_size = 0;
1134     grep_info->isam_p_buf = NULL;
1135     grep_info->zh = zh;
1136     grep_info->index_type = index_type;
1137     grep_info->termset = 0;
1138     if (zapt)
1139     {
1140         AttrType truncmax;
1141         int truncmax_value;
1142
1143         attr_init_APT(&truncmax, zapt, 13);
1144         truncmax_value = attr_find(&truncmax, NULL);
1145         if (truncmax_value != -1)
1146             grep_info->trunc_max = truncmax_value;
1147     }
1148     if (zapt)
1149     {
1150         AttrType termset;
1151         int termset_value_numeric;
1152         const char *termset_value_string;
1153
1154         attr_init_APT(&termset, zapt, 8);
1155         termset_value_numeric =
1156             attr_find_ex(&termset, NULL, &termset_value_string);
1157         if (termset_value_numeric != -1)
1158         {
1159 #if TERMSET_DISABLE
1160             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1161             return ZEBRA_FAIL;
1162 #else
1163             char resname[32];
1164             const char *termset_name = 0;
1165             if (termset_value_numeric != -2)
1166             {
1167                 
1168                 sprintf(resname, "%d", termset_value_numeric);
1169                 termset_name = resname;
1170             }
1171             else
1172             termset_name = termset_value_string;
1173             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1174             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1175             if (!grep_info->termset)
1176             {
1177                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1178                 return ZEBRA_FAIL;
1179             }
1180 #endif
1181         }
1182     }
1183     return ZEBRA_OK;
1184 }
1185                                
1186 /**
1187   \brief Create result set(s) for list of terms
1188   \param zh Zebra Handle
1189   \param zapt Attributes Plust Term (RPN leaf)
1190   \param termz term as used in query but converted to UTF-8
1191   \param attributeSet default attribute set
1192   \param stream memory for result
1193   \param index_type register type ("w", "p",..)
1194   \param complete_flag whether it's phrases or not
1195   \param rank_type term flags for ranking
1196   \param xpath_use use attribute for X-Path (-1 for no X-path)
1197   \param rset_nmem memory for result sets
1198   \param result_sets output result set for each term in list (output)
1199   \param num_result_sets number of output result sets
1200   \param kc rset key control to be used for created result sets
1201 */
1202 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1203                                  Z_AttributesPlusTerm *zapt,
1204                                  const char *termz,
1205                                  const Odr_oid *attributeSet,
1206                                  NMEM stream,
1207                                  const char *index_type, int complete_flag,
1208                                  const char *rank_type,
1209                                  const char *xpath_use,
1210                                  NMEM rset_nmem,
1211                                  RSET **result_sets, int *num_result_sets,
1212                                  struct rset_key_control *kc)
1213 {
1214     char term_dst[IT_MAX_WORD+1];
1215     struct grep_info grep_info;
1216     const char *termp = termz;
1217     int alloc_sets = 0;
1218
1219     *num_result_sets = 0;
1220     *term_dst = 0;
1221     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1222         return ZEBRA_FAIL;
1223     while(1)
1224     { 
1225         ZEBRA_RES res;
1226
1227         if (alloc_sets == *num_result_sets)
1228         {
1229             int add = 10;
1230             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1231                                               sizeof(*rnew));
1232             if (alloc_sets)
1233                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1234             alloc_sets = alloc_sets + add;
1235             *result_sets = rnew;
1236         }
1237         res = term_trunc(zh, zapt, &termp, attributeSet,
1238                          stream, &grep_info,
1239                          index_type, complete_flag,
1240                          term_dst, rank_type,
1241                          xpath_use, rset_nmem,
1242                          &(*result_sets)[*num_result_sets],
1243                          kc);
1244         if (res != ZEBRA_OK)
1245         {
1246             int i;
1247             for (i = 0; i < *num_result_sets; i++)
1248                 rset_delete((*result_sets)[i]);
1249             grep_info_delete(&grep_info);
1250             return res;
1251         }
1252         if ((*result_sets)[*num_result_sets] == 0)
1253             break;
1254         (*num_result_sets)++;
1255
1256         if (!*termp)
1257             break;
1258     }
1259     grep_info_delete(&grep_info);
1260     return ZEBRA_OK;
1261 }
1262
1263 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1264                                          Z_AttributesPlusTerm *zapt,
1265                                          const Odr_oid *attributeSet,
1266                                          const char *index_type,
1267                                          NMEM rset_nmem,
1268                                          RSET *rset,
1269                                          struct rset_key_control *kc)
1270 {
1271     int position_value;
1272     AttrType position;
1273     int ord = -1;
1274     char ord_buf[32];
1275     char term_dict[100];
1276     int ord_len;
1277     char *val;
1278     ISAM_P isam_p;
1279     zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
1280     
1281     attr_init_APT(&position, zapt, 3);
1282     position_value = attr_find(&position, NULL);
1283     switch(position_value)
1284     {
1285     case 3:
1286     case -1:
1287         return ZEBRA_OK;
1288     case 1:
1289     case 2:
1290         break;
1291     default:
1292         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1293                             position_value);
1294         return ZEBRA_FAIL;
1295     }
1296
1297
1298     if (!zebra_maps_is_first_in_field(zm))
1299     {
1300         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1301                             position_value);
1302         return ZEBRA_FAIL;
1303     }
1304
1305     if (!zh->reg->isamb && !zh->reg->isamc)
1306     {
1307         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1308                             position_value);
1309         return ZEBRA_FAIL;
1310     }
1311
1312     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1313                           attributeSet, &ord) != ZEBRA_OK)
1314     {
1315         return ZEBRA_FAIL;
1316     }
1317     ord_len = key_SU_encode(ord, ord_buf);
1318     memcpy(term_dict, ord_buf, ord_len);
1319     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1320     val = dict_lookup(zh->reg->dict, term_dict);
1321     if (val)
1322     {
1323         assert(*val == sizeof(ISAM_P));
1324         memcpy(&isam_p, val+1, sizeof(isam_p));
1325         
1326         if (zh->reg->isamb)
1327             *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1328                                    zh->reg->isamb, isam_p, 0);
1329         else if (zh->reg->isamc)
1330             *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1331                                    zh->reg->isamc, isam_p, 0);
1332     }
1333     return ZEBRA_OK;
1334 }
1335                                          
1336 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1337                                        Z_AttributesPlusTerm *zapt,
1338                                        const char *termz_org,
1339                                        const Odr_oid *attributeSet,
1340                                        NMEM stream,
1341                                        const char *index_type, int complete_flag,
1342                                        const char *rank_type,
1343                                        const char *xpath_use,
1344                                        NMEM rset_nmem,
1345                                        RSET *rset,
1346                                        struct rset_key_control *kc)
1347 {
1348     RSET *result_sets = 0;
1349     int num_result_sets = 0;
1350     ZEBRA_RES res =
1351         term_list_trunc(zh, zapt, termz_org, attributeSet,
1352                         stream, index_type, complete_flag,
1353                         rank_type, xpath_use,
1354                         rset_nmem,
1355                         &result_sets, &num_result_sets, kc);
1356
1357     if (res != ZEBRA_OK)
1358         return res;
1359
1360     if (num_result_sets > 0)
1361     {
1362         RSET first_set = 0;
1363         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1364                                       index_type,
1365                                       rset_nmem, &first_set,
1366                                       kc);
1367         if (res != ZEBRA_OK)
1368             return res;
1369         if (first_set)
1370         {
1371             RSET *nsets = nmem_malloc(stream,
1372                                       sizeof(RSET) * (num_result_sets+1));
1373             nsets[0] = first_set;
1374             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1375             result_sets = nsets;
1376             num_result_sets++;
1377         }
1378     }
1379     if (num_result_sets == 0)
1380         *rset = rset_create_null(rset_nmem, kc, 0); 
1381     else if (num_result_sets == 1)
1382         *rset = result_sets[0];
1383     else
1384         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1385                                  num_result_sets, result_sets,
1386                                  1 /* ordered */, 0 /* exclusion */,
1387                                  3 /* relation */, 1 /* distance */);
1388     if (!*rset)
1389         return ZEBRA_FAIL;
1390     return ZEBRA_OK;
1391 }
1392
1393 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1394                                         Z_AttributesPlusTerm *zapt,
1395                                         const char *termz_org,
1396                                         const Odr_oid *attributeSet,
1397                                         NMEM stream,
1398                                         const char *index_type, 
1399                                         int complete_flag,
1400                                         const char *rank_type,
1401                                         const char *xpath_use,
1402                                         NMEM rset_nmem,
1403                                         RSET *rset,
1404                                         struct rset_key_control *kc)
1405 {
1406     RSET *result_sets = 0;
1407     int num_result_sets = 0;
1408     int i;
1409     ZEBRA_RES res =
1410         term_list_trunc(zh, zapt, termz_org, attributeSet,
1411                         stream, index_type, complete_flag,
1412                         rank_type, xpath_use,
1413                         rset_nmem,
1414                         &result_sets, &num_result_sets, kc);
1415     if (res != ZEBRA_OK)
1416         return res;
1417
1418     for (i = 0; i<num_result_sets; i++)
1419     {
1420         RSET first_set = 0;
1421         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1422                                       index_type,
1423                                       rset_nmem, &first_set,
1424                                       kc);
1425         if (res != ZEBRA_OK)
1426         {
1427             for (i = 0; i<num_result_sets; i++)
1428                 rset_delete(result_sets[i]);
1429             return res;
1430         }
1431
1432         if (first_set)
1433         {
1434             RSET tmp_set[2];
1435
1436             tmp_set[0] = first_set;
1437             tmp_set[1] = result_sets[i];
1438             
1439             result_sets[i] = rset_create_prox(
1440                 rset_nmem, kc, kc->scope,
1441                 2, tmp_set,
1442                 1 /* ordered */, 0 /* exclusion */,
1443                 3 /* relation */, 1 /* distance */);
1444         }
1445     }
1446     if (num_result_sets == 0)
1447         *rset = rset_create_null(rset_nmem, kc, 0); 
1448     else if (num_result_sets == 1)
1449         *rset = result_sets[0];
1450     else
1451         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1452                                num_result_sets, result_sets);
1453     if (!*rset)
1454         return ZEBRA_FAIL;
1455     return ZEBRA_OK;
1456 }
1457
1458 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1459                                          Z_AttributesPlusTerm *zapt,
1460                                          const char *termz_org,
1461                                          const Odr_oid *attributeSet,
1462                                          NMEM stream,
1463                                          const char *index_type, 
1464                                          int complete_flag,
1465                                          const char *rank_type, 
1466                                          const char *xpath_use,
1467                                          NMEM rset_nmem,
1468                                          RSET *rset,
1469                                          struct rset_key_control *kc)
1470 {
1471     RSET *result_sets = 0;
1472     int num_result_sets = 0;
1473     int i;
1474     ZEBRA_RES res =
1475         term_list_trunc(zh, zapt, termz_org, attributeSet,
1476                         stream, index_type, complete_flag,
1477                         rank_type, xpath_use,
1478                         rset_nmem,
1479                         &result_sets, &num_result_sets,
1480                         kc);
1481     if (res != ZEBRA_OK)
1482         return res;
1483     for (i = 0; i<num_result_sets; i++)
1484     {
1485         RSET first_set = 0;
1486         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1487                                       index_type,
1488                                       rset_nmem, &first_set,
1489                                       kc);
1490         if (res != ZEBRA_OK)
1491         {
1492             for (i = 0; i<num_result_sets; i++)
1493                 rset_delete(result_sets[i]);
1494             return res;
1495         }
1496
1497         if (first_set)
1498         {
1499             RSET tmp_set[2];
1500
1501             tmp_set[0] = first_set;
1502             tmp_set[1] = result_sets[i];
1503             
1504             result_sets[i] = rset_create_prox(
1505                 rset_nmem, kc, kc->scope,
1506                 2, tmp_set,
1507                 1 /* ordered */, 0 /* exclusion */,
1508                 3 /* relation */, 1 /* distance */);
1509         }
1510     }
1511
1512
1513     if (num_result_sets == 0)
1514         *rset = rset_create_null(rset_nmem, kc, 0); 
1515     else if (num_result_sets == 1)
1516         *rset = result_sets[0];
1517     else
1518         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1519                                num_result_sets, result_sets);
1520     if (!*rset)
1521         return ZEBRA_FAIL;
1522     return ZEBRA_OK;
1523 }
1524
1525 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1526                             const char **term_sub,
1527                             WRBUF term_dict,
1528                             const Odr_oid *attributeSet,
1529                             struct grep_info *grep_info,
1530                             int *max_pos,
1531                             zebra_map_t zm,
1532                             char *term_dst,
1533                             int *error_code)
1534 {
1535     AttrType relation;
1536     int relation_value;
1537     int term_value;
1538     int r;
1539     WRBUF term_num = wrbuf_alloc();
1540
1541     *error_code = 0;
1542     attr_init_APT(&relation, zapt, 2);
1543     relation_value = attr_find(&relation, NULL);
1544
1545     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1546
1547     switch (relation_value)
1548     {
1549     case 1:
1550         yaz_log(log_level_rpn, "Relation <");
1551         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1552         { 
1553             wrbuf_destroy(term_num);
1554             return 0;
1555         }
1556         term_value = atoi(wrbuf_cstr(term_num));
1557         gen_regular_rel(term_dict, term_value-1, 1);
1558         break;
1559     case 2:
1560         yaz_log(log_level_rpn, "Relation <=");
1561         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1562         {
1563             wrbuf_destroy(term_num);
1564             return 0;
1565         }
1566         term_value = atoi(wrbuf_cstr(term_num));
1567         gen_regular_rel(term_dict, term_value, 1);
1568         break;
1569     case 4:
1570         yaz_log(log_level_rpn, "Relation >=");
1571         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1572         {
1573             wrbuf_destroy(term_num);
1574             return 0;
1575         }
1576         term_value = atoi(wrbuf_cstr(term_num));
1577         gen_regular_rel(term_dict, term_value, 0);
1578         break;
1579     case 5:
1580         yaz_log(log_level_rpn, "Relation >");
1581         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1582         {
1583             wrbuf_destroy(term_num);
1584             return 0;
1585         }
1586         term_value = atoi(wrbuf_cstr(term_num));
1587         gen_regular_rel(term_dict, term_value+1, 0);
1588         break;
1589     case -1:
1590     case 3:
1591         yaz_log(log_level_rpn, "Relation =");
1592         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1593         {
1594             wrbuf_destroy(term_num);
1595             return 0; 
1596         }
1597         term_value = atoi(wrbuf_cstr(term_num));
1598         wrbuf_printf(term_dict, "(0*%d)", term_value);
1599         break;
1600     case 103:
1601         /* term_tmp untouched.. */
1602         while (**term_sub != '\0')
1603             (*term_sub)++;
1604         break;
1605     default:
1606         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1607         wrbuf_destroy(term_num); 
1608         return 0;
1609     }
1610     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1611                          0, grep_info, max_pos, 0, grep_handle);
1612
1613     if (r == 1)
1614         zebra_set_partial_result(zh);
1615     else if (r)
1616         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1617     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1618     wrbuf_destroy(term_num);
1619     return 1;
1620 }
1621
1622 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1623                               const char **term_sub, 
1624                               WRBUF term_dict,
1625                               const Odr_oid *attributeSet, NMEM stream,
1626                               struct grep_info *grep_info,
1627                               const char *index_type, int complete_flag,
1628                               char *term_dst, 
1629                               const char *xpath_use,
1630                               struct ord_list **ol)
1631 {
1632     const char *termp;
1633     struct rpn_char_map_info rcmi;
1634     int max_pos;
1635     int relation_error = 0;
1636     int ord, ord_len, i;
1637     char ord_buf[32];
1638     zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
1639     
1640     *ol = ord_list_create(stream);
1641
1642     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1643
1644     termp = *term_sub;
1645     
1646     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1647                           attributeSet, &ord) != ZEBRA_OK)
1648     {
1649         return ZEBRA_FAIL;
1650     }
1651     
1652     wrbuf_rewind(term_dict);
1653     
1654     *ol = ord_list_append(stream, *ol, ord);
1655     
1656     ord_len = key_SU_encode(ord, ord_buf);
1657     
1658     wrbuf_putc(term_dict, '(');
1659     for (i = 0; i < ord_len; i++)
1660     {
1661         wrbuf_putc(term_dict, 1);
1662         wrbuf_putc(term_dict, ord_buf[i]);
1663     }
1664     wrbuf_putc(term_dict, ')');
1665     
1666     if (!numeric_relation(zh, zapt, &termp, term_dict,
1667                           attributeSet, grep_info, &max_pos, zm,
1668                           term_dst, &relation_error))
1669     {
1670         if (relation_error)
1671         {
1672             zebra_setError(zh, relation_error, 0);
1673             return ZEBRA_FAIL;
1674         }
1675         *term_sub = 0;
1676         return ZEBRA_OK;
1677     }
1678     *term_sub = termp;
1679     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1680     return ZEBRA_OK;
1681 }
1682
1683                                  
1684 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1685                                         Z_AttributesPlusTerm *zapt,
1686                                         const char *termz,
1687                                         const Odr_oid *attributeSet,
1688                                         NMEM stream,
1689                                         const char *index_type, 
1690                                         int complete_flag,
1691                                         const char *rank_type, 
1692                                         const char *xpath_use,
1693                                         NMEM rset_nmem,
1694                                         RSET *rset,
1695                                         struct rset_key_control *kc)
1696 {
1697     char term_dst[IT_MAX_WORD+1];
1698     const char *termp = termz;
1699     RSET *result_sets = 0;
1700     int num_result_sets = 0;
1701     ZEBRA_RES res;
1702     struct grep_info grep_info;
1703     int alloc_sets = 0;
1704     zint hits_limit_value;
1705     const char *term_ref_id_str = 0;
1706
1707     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1708                           stream);
1709
1710     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1711     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1712         return ZEBRA_FAIL;
1713     while (1)
1714     { 
1715         struct ord_list *ol;
1716         WRBUF term_dict = wrbuf_alloc();
1717         if (alloc_sets == num_result_sets)
1718         {
1719             int add = 10;
1720             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1721                                               sizeof(*rnew));
1722             if (alloc_sets)
1723                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1724             alloc_sets = alloc_sets + add;
1725             result_sets = rnew;
1726         }
1727         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1728         grep_info.isam_p_indx = 0;
1729         res = numeric_term(zh, zapt, &termp, term_dict,
1730                            attributeSet, stream, &grep_info,
1731                            index_type, complete_flag,
1732                            term_dst, xpath_use, &ol);
1733         wrbuf_destroy(term_dict);
1734         if (res == ZEBRA_FAIL || termp == 0)
1735             break;
1736         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1737         result_sets[num_result_sets] =
1738             rset_trunc(zh, grep_info.isam_p_buf,
1739                        grep_info.isam_p_indx, term_dst,
1740                        strlen(term_dst), rank_type,
1741                        0 /* preserve position */,
1742                        zapt->term->which, rset_nmem, 
1743                        kc, kc->scope, ol, index_type,
1744                        hits_limit_value,
1745                        term_ref_id_str);
1746         if (!result_sets[num_result_sets])
1747             break;
1748         num_result_sets++;
1749         if (!*termp)
1750             break;
1751     }
1752     grep_info_delete(&grep_info);
1753
1754     if (res != ZEBRA_OK)
1755         return res;
1756     if (num_result_sets == 0)
1757         *rset = rset_create_null(rset_nmem, kc, 0);
1758     else if (num_result_sets == 1)
1759         *rset = result_sets[0];
1760     else
1761         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1762                                 num_result_sets, result_sets);
1763     if (!*rset)
1764         return ZEBRA_FAIL;
1765     return ZEBRA_OK;
1766 }
1767
1768 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1769                                       Z_AttributesPlusTerm *zapt,
1770                                       const char *termz,
1771                                       const Odr_oid *attributeSet,
1772                                       NMEM stream,
1773                                       const char *rank_type, NMEM rset_nmem,
1774                                       RSET *rset,
1775                                       struct rset_key_control *kc)
1776 {
1777     Record rec;
1778     zint sysno = atozint(termz);
1779     
1780     if (sysno <= 0)
1781         sysno = 0;
1782     rec = rec_get(zh->reg->records, sysno);
1783     if (!rec)
1784         sysno = 0;
1785
1786     rec_free(&rec);
1787
1788     if (sysno <= 0)
1789     {
1790         *rset = rset_create_null(rset_nmem, kc, 0);
1791     }
1792     else
1793     {
1794         RSFD rsfd;
1795         struct it_key key;
1796         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1797                                  res_get(zh->res, "setTmpDir"), 0);
1798         rsfd = rset_open(*rset, RSETF_WRITE);
1799         
1800         key.mem[0] = sysno;
1801         key.mem[1] = 1;
1802         key.len = 2;
1803         rset_write(rsfd, &key);
1804         rset_close(rsfd);
1805     }
1806     return ZEBRA_OK;
1807 }
1808
1809 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1810                                const Odr_oid *attributeSet, NMEM stream,
1811                                Z_SortKeySpecList *sort_sequence,
1812                                const char *rank_type,
1813                                NMEM rset_nmem,
1814                                RSET *rset,
1815                                struct rset_key_control *kc)
1816 {
1817     int i;
1818     int sort_relation_value;
1819     AttrType sort_relation_type;
1820     Z_SortKeySpec *sks;
1821     Z_SortKey *sk;
1822     char termz[20];
1823     
1824     attr_init_APT(&sort_relation_type, zapt, 7);
1825     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1826
1827     if (!sort_sequence->specs)
1828     {
1829         sort_sequence->num_specs = 10;
1830         sort_sequence->specs = (Z_SortKeySpec **)
1831             nmem_malloc(stream, sort_sequence->num_specs *
1832                          sizeof(*sort_sequence->specs));
1833         for (i = 0; i<sort_sequence->num_specs; i++)
1834             sort_sequence->specs[i] = 0;
1835     }
1836     if (zapt->term->which != Z_Term_general)
1837         i = 0;
1838     else
1839         i = atoi_n((char *) zapt->term->u.general->buf,
1840                     zapt->term->u.general->len);
1841     if (i >= sort_sequence->num_specs)
1842         i = 0;
1843     sprintf(termz, "%d", i);
1844
1845     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1846     sks->sortElement = (Z_SortElement *)
1847         nmem_malloc(stream, sizeof(*sks->sortElement));
1848     sks->sortElement->which = Z_SortElement_generic;
1849     sk = sks->sortElement->u.generic = (Z_SortKey *)
1850         nmem_malloc(stream, sizeof(*sk));
1851     sk->which = Z_SortKey_sortAttributes;
1852     sk->u.sortAttributes = (Z_SortAttributes *)
1853         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1854
1855     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1856     sk->u.sortAttributes->list = zapt->attributes;
1857
1858     sks->sortRelation = (int *)
1859         nmem_malloc(stream, sizeof(*sks->sortRelation));
1860     if (sort_relation_value == 1)
1861         *sks->sortRelation = Z_SortKeySpec_ascending;
1862     else if (sort_relation_value == 2)
1863         *sks->sortRelation = Z_SortKeySpec_descending;
1864     else 
1865         *sks->sortRelation = Z_SortKeySpec_ascending;
1866
1867     sks->caseSensitivity = (int *)
1868         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1869     *sks->caseSensitivity = 0;
1870
1871     sks->which = Z_SortKeySpec_null;
1872     sks->u.null = odr_nullval ();
1873     sort_sequence->specs[i] = sks;
1874     *rset = rset_create_null(rset_nmem, kc, 0);
1875     return ZEBRA_OK;
1876 }
1877
1878
1879 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1880                            const Odr_oid *attributeSet,
1881                            struct xpath_location_step *xpath, int max,
1882                            NMEM mem)
1883 {
1884     const Odr_oid *curAttributeSet = attributeSet;
1885     AttrType use;
1886     const char *use_string = 0;
1887     
1888     attr_init_APT(&use, zapt, 1);
1889     attr_find_ex(&use, &curAttributeSet, &use_string);
1890
1891     if (!use_string || *use_string != '/')
1892         return -1;
1893
1894     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1895 }
1896  
1897                
1898
1899 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1900                         const char *index_type, const char *term, 
1901                         const char *xpath_use,
1902                         NMEM rset_nmem,
1903                         struct rset_key_control *kc)
1904 {
1905     struct grep_info grep_info;
1906     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1907                                            zinfo_index_category_index,
1908                                            index_type, xpath_use);
1909     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1910         return rset_create_null(rset_nmem, kc, 0);
1911     
1912     if (ord < 0)
1913         return rset_create_null(rset_nmem, kc, 0);
1914     else
1915     {
1916         int i, r, max_pos;
1917         char ord_buf[32];
1918         RSET rset;
1919         WRBUF term_dict = wrbuf_alloc();
1920         int ord_len = key_SU_encode(ord, ord_buf);
1921         int term_type = Z_Term_characterString;
1922         const char *flags = "void";
1923
1924         wrbuf_putc(term_dict, '(');
1925         for (i = 0; i<ord_len; i++)
1926         {
1927             wrbuf_putc(term_dict, 1);
1928             wrbuf_putc(term_dict, ord_buf[i]);
1929         }
1930         wrbuf_putc(term_dict, ')');
1931         wrbuf_puts(term_dict, term);
1932         
1933         grep_info.isam_p_indx = 0;
1934         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1935                              &grep_info, &max_pos, 0, grep_handle);
1936         yaz_log(YLOG_DEBUG, "%s %d positions", term,
1937                 grep_info.isam_p_indx);
1938         rset = rset_trunc(zh, grep_info.isam_p_buf,
1939                           grep_info.isam_p_indx, term, strlen(term),
1940                           flags, 1, term_type, rset_nmem,
1941                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1942                           0 /* term_ref_id_str */);
1943         grep_info_delete(&grep_info);
1944         wrbuf_destroy(term_dict);
1945         return rset;
1946     }
1947 }
1948
1949 static
1950 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1951                            NMEM stream, const char *rank_type, RSET rset,
1952                            int xpath_len, struct xpath_location_step *xpath,
1953                            NMEM rset_nmem,
1954                            RSET *rset_out,
1955                            struct rset_key_control *kc)
1956 {
1957     int i;
1958     int always_matches = rset ? 0 : 1;
1959
1960     if (xpath_len < 0)
1961     {
1962         *rset_out = rset;
1963         return ZEBRA_OK;
1964     }
1965
1966     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1967     for (i = 0; i<xpath_len; i++)
1968     {
1969         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1970
1971     }
1972
1973     /*
1974       //a    ->    a/.*
1975       //a/b  ->    b/a/.*
1976       /a     ->    a/
1977       /a/b   ->    b/a/
1978
1979       /      ->    none
1980
1981    a[@attr = value]/b[@other = othervalue]
1982
1983  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1984  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1985  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1986  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1987  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1988  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1989       
1990     */
1991
1992     dict_grep_cmap(zh->reg->dict, 0, 0);
1993     
1994     {
1995         int level = xpath_len;
1996         int first_path = 1;
1997         
1998         while (--level >= 0)
1999         {
2000             WRBUF xpath_rev = wrbuf_alloc();
2001             int i;
2002             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2003
2004             for (i = level; i >= 1; --i)
2005             {
2006                 const char *cp = xpath[i].part;
2007                 if (*cp)
2008                 {
2009                     for (; *cp; cp++)
2010                     {
2011                         if (*cp == '*')
2012                             wrbuf_puts(xpath_rev, "[^/]*");
2013                         else if (*cp == ' ')
2014                             wrbuf_puts(xpath_rev, "\001 ");
2015                         else
2016                             wrbuf_putc(xpath_rev, *cp);
2017
2018                         /* wrbuf_putc does not null-terminate , but
2019                            wrbuf_puts below ensures it does.. so xpath_rev
2020                            is OK iff length is > 0 */
2021                     }
2022                     wrbuf_puts(xpath_rev, "/");
2023                 }
2024                 else if (i == 1)  /* // case */
2025                     wrbuf_puts(xpath_rev, ".*");
2026             }
2027             if (xpath[level].predicate &&
2028                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2029                 xpath[level].predicate->u.relation.name[0])
2030             {
2031                 WRBUF wbuf = wrbuf_alloc();
2032                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2033                 if (xpath[level].predicate->u.relation.value)
2034                 {
2035                     const char *cp = xpath[level].predicate->u.relation.value;
2036                     wrbuf_putc(wbuf, '=');
2037                     
2038                     while (*cp)
2039                     {
2040                         if (strchr(REGEX_CHARS, *cp))
2041                             wrbuf_putc(wbuf, '\\');
2042                         wrbuf_putc(wbuf, *cp);
2043                         cp++;
2044                     }
2045                 }
2046                 rset_attr = xpath_trunc(
2047                     zh, stream, "0", wrbuf_cstr(wbuf), 
2048                     ZEBRA_XPATH_ATTR_NAME, 
2049                     rset_nmem, kc);
2050                 wrbuf_destroy(wbuf);
2051             } 
2052             else 
2053             {
2054                 if (!first_path)
2055                 {
2056                     wrbuf_destroy(xpath_rev);
2057                     continue;
2058                 }
2059             }
2060             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2061                     wrbuf_cstr(xpath_rev));
2062             if (wrbuf_len(xpath_rev))
2063             {
2064                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2065                                              wrbuf_cstr(xpath_rev),
2066                                              ZEBRA_XPATH_ELM_BEGIN, 
2067                                              rset_nmem, kc);
2068                 if (always_matches)
2069                     rset = rset_start_tag;
2070                 else
2071                 {
2072                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2073                                                wrbuf_cstr(xpath_rev),
2074                                                ZEBRA_XPATH_ELM_END, 
2075                                                rset_nmem, kc);
2076                     
2077                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2078                                                rset_start_tag, rset,
2079                                                rset_end_tag, rset_attr);
2080                 }
2081             }
2082             wrbuf_destroy(xpath_rev);
2083             first_path = 0;
2084         }
2085     }
2086     *rset_out = rset;
2087     return ZEBRA_OK;
2088 }
2089
2090 #define MAX_XPATH_STEPS 10
2091
2092 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2093                                      Z_AttributesPlusTerm *zapt,
2094                                      const Odr_oid *attributeSet, NMEM stream,
2095                                      Z_SortKeySpecList *sort_sequence,
2096                                      NMEM rset_nmem,
2097                                      RSET *rset,
2098                                      struct rset_key_control *kc);
2099
2100 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2101                                 const Odr_oid *attributeSet, NMEM stream,
2102                                 Z_SortKeySpecList *sort_sequence,
2103                                 int num_bases, char **basenames, 
2104                                 NMEM rset_nmem,
2105                                 RSET *rset,
2106                                 struct rset_key_control *kc)
2107 {
2108     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2109     ZEBRA_RES res = ZEBRA_OK;
2110     int i;
2111     for (i = 0; i < num_bases; i++)
2112     {
2113
2114         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2115         {
2116             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2117                            basenames[i]);
2118             res = ZEBRA_FAIL;
2119             break;
2120         }
2121         res = rpn_search_database(zh, zapt, attributeSet, stream,
2122                                   sort_sequence,
2123                                   rset_nmem, rsets+i, kc);
2124         if (res != ZEBRA_OK)
2125             break;
2126     }
2127     if (res != ZEBRA_OK)
2128     {   /* must clean up the already created sets */
2129         while (--i >= 0)
2130             rset_delete(rsets[i]);
2131         *rset = 0;
2132     }
2133     else 
2134     {
2135         if (num_bases == 1)
2136             *rset = rsets[0];
2137         else if (num_bases == 0)
2138             *rset = rset_create_null(rset_nmem, kc, 0); 
2139         else
2140             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2141                                    num_bases, rsets);
2142     }
2143     return res;
2144 }
2145
2146 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2147                                      Z_AttributesPlusTerm *zapt,
2148                                      const Odr_oid *attributeSet, NMEM stream,
2149                                      Z_SortKeySpecList *sort_sequence,
2150                                      NMEM rset_nmem,
2151                                      RSET *rset,
2152                                      struct rset_key_control *kc)
2153 {
2154     ZEBRA_RES res = ZEBRA_OK;
2155     const char *index_type;
2156     char *search_type = NULL;
2157     char rank_type[128];
2158     int complete_flag;
2159     int sort_flag;
2160     char termz[IT_MAX_WORD+1];
2161     int xpath_len;
2162     const char *xpath_use = 0;
2163     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2164
2165     if (!log_level_set)
2166     {
2167         log_level_rpn = yaz_log_module_level("rpn");
2168         log_level_set = 1;
2169     }
2170     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2171                     rank_type, &complete_flag, &sort_flag);
2172     
2173     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2174     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2175     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2176     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2177
2178     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2179         return ZEBRA_FAIL;
2180
2181     if (sort_flag)
2182         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2183                              rank_type, rset_nmem, rset, kc);
2184     /* consider if an X-Path query is used */
2185     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2186                                 xpath, MAX_XPATH_STEPS, stream);
2187     if (xpath_len >= 0)
2188     {
2189         if (xpath[xpath_len-1].part[0] == '@') 
2190             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2191         else
2192             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2193
2194         if (1)
2195         {
2196             AttrType relation;
2197             int relation_value;
2198
2199             attr_init_APT(&relation, zapt, 2);
2200             relation_value = attr_find(&relation, NULL);
2201
2202             if (relation_value == 103) /* alwaysmatches */
2203             {
2204                 *rset = 0; /* signal no "term" set */
2205                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2206                                         xpath_len, xpath, rset_nmem, rset, kc);
2207             }
2208         }
2209     }
2210
2211     /* search using one of the various search type strategies
2212        termz is our UTF-8 search term
2213        attributeSet is top-level default attribute set 
2214        stream is ODR for search
2215        reg_id is the register type
2216        complete_flag is 1 for complete subfield, 0 for incomplete
2217        xpath_use is use-attribute to be used for X-Path search, 0 for none
2218     */
2219     if (!strcmp(search_type, "phrase"))
2220     {
2221         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2222                                     index_type, complete_flag, rank_type,
2223                                     xpath_use,
2224                                     rset_nmem,
2225                                     rset, kc);
2226     }
2227     else if (!strcmp(search_type, "and-list"))
2228     {
2229         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2230                                       index_type, complete_flag, rank_type,
2231                                       xpath_use,
2232                                       rset_nmem,
2233                                       rset, kc);
2234     }
2235     else if (!strcmp(search_type, "or-list"))
2236     {
2237         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2238                                      index_type, complete_flag, rank_type,
2239                                      xpath_use,
2240                                      rset_nmem,
2241                                      rset, kc);
2242     }
2243     else if (!strcmp(search_type, "local"))
2244     {
2245         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2246                                    rank_type, rset_nmem, rset, kc);
2247     }
2248     else if (!strcmp(search_type, "numeric"))
2249     {
2250         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2251                                      index_type, complete_flag, rank_type,
2252                                      xpath_use,
2253                                      rset_nmem,
2254                                      rset, kc);
2255     }
2256     else
2257     {
2258         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2259         res = ZEBRA_FAIL;
2260     }
2261     if (res != ZEBRA_OK)
2262         return res;
2263     if (!*rset)
2264         return ZEBRA_FAIL;
2265     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2266                             xpath_len, xpath, rset_nmem, rset, kc);
2267 }
2268
2269 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2270                                       const Odr_oid *attributeSet, 
2271                                       NMEM stream, NMEM rset_nmem,
2272                                       Z_SortKeySpecList *sort_sequence,
2273                                       int num_bases, char **basenames,
2274                                       RSET **result_sets, int *num_result_sets,
2275                                       Z_Operator *parent_op,
2276                                       struct rset_key_control *kc);
2277
2278 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2279                                    zint *approx_limit)
2280 {
2281     ZEBRA_RES res = ZEBRA_OK;
2282     if (zs->which == Z_RPNStructure_complex)
2283     {
2284         if (res == ZEBRA_OK)
2285             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2286                                            approx_limit);
2287         if (res == ZEBRA_OK)
2288             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2289                                            approx_limit);
2290     }
2291     else if (zs->which == Z_RPNStructure_simple)
2292     {
2293         if (zs->u.simple->which == Z_Operand_APT)
2294         {
2295             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2296             AttrType global_hits_limit_attr;
2297             int l;
2298             
2299             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2300             
2301             l = attr_find(&global_hits_limit_attr, NULL);
2302             if (l != -1)
2303                 *approx_limit = l;
2304         }
2305     }
2306     return res;
2307 }
2308
2309 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2310                          const Odr_oid *attributeSet, 
2311                          NMEM stream, NMEM rset_nmem,
2312                          Z_SortKeySpecList *sort_sequence,
2313                          int num_bases, char **basenames,
2314                          RSET *result_set)
2315 {
2316     RSET *result_sets = 0;
2317     int num_result_sets = 0;
2318     ZEBRA_RES res;
2319     struct rset_key_control *kc = zebra_key_control_create(zh);
2320
2321     res = rpn_search_structure(zh, zs, attributeSet,
2322                                stream, rset_nmem,
2323                                sort_sequence, 
2324                                num_bases, basenames,
2325                                &result_sets, &num_result_sets,
2326                                0 /* no parent op */,
2327                                kc);
2328     if (res != ZEBRA_OK)
2329     {
2330         int i;
2331         for (i = 0; i<num_result_sets; i++)
2332             rset_delete(result_sets[i]);
2333         *result_set = 0;
2334     }
2335     else
2336     {
2337         assert(num_result_sets == 1);
2338         assert(result_sets);
2339         assert(*result_sets);
2340         *result_set = *result_sets;
2341     }
2342     (*kc->dec)(kc);
2343     return res;
2344 }
2345
2346 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2347                                const Odr_oid *attributeSet, 
2348                                NMEM stream, NMEM rset_nmem,
2349                                Z_SortKeySpecList *sort_sequence,
2350                                int num_bases, char **basenames,
2351                                RSET **result_sets, int *num_result_sets,
2352                                Z_Operator *parent_op,
2353                                struct rset_key_control *kc)
2354 {
2355     *num_result_sets = 0;
2356     if (zs->which == Z_RPNStructure_complex)
2357     {
2358         ZEBRA_RES res;
2359         Z_Operator *zop = zs->u.complex->roperator;
2360         RSET *result_sets_l = 0;
2361         int num_result_sets_l = 0;
2362         RSET *result_sets_r = 0;
2363         int num_result_sets_r = 0;
2364
2365         res = rpn_search_structure(zh, zs->u.complex->s1,
2366                                    attributeSet, stream, rset_nmem,
2367                                    sort_sequence,
2368                                    num_bases, basenames,
2369                                    &result_sets_l, &num_result_sets_l,
2370                                    zop, kc);
2371         if (res != ZEBRA_OK)
2372         {
2373             int i;
2374             for (i = 0; i<num_result_sets_l; i++)
2375                 rset_delete(result_sets_l[i]);
2376             return res;
2377         }
2378         res = rpn_search_structure(zh, zs->u.complex->s2,
2379                                    attributeSet, stream, rset_nmem,
2380                                    sort_sequence,
2381                                    num_bases, basenames,
2382                                    &result_sets_r, &num_result_sets_r,
2383                                    zop, kc);
2384         if (res != ZEBRA_OK)
2385         {
2386             int i;
2387             for (i = 0; i<num_result_sets_l; i++)
2388                 rset_delete(result_sets_l[i]);
2389             for (i = 0; i<num_result_sets_r; i++)
2390                 rset_delete(result_sets_r[i]);
2391             return res;
2392         }
2393
2394         /* make a new list of result for all children */
2395         *num_result_sets = num_result_sets_l + num_result_sets_r;
2396         *result_sets = nmem_malloc(stream, *num_result_sets * 
2397                                    sizeof(**result_sets));
2398         memcpy(*result_sets, result_sets_l, 
2399                num_result_sets_l * sizeof(**result_sets));
2400         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2401                num_result_sets_r * sizeof(**result_sets));
2402
2403         if (!parent_op || parent_op->which != zop->which
2404             || (zop->which != Z_Operator_and &&
2405                 zop->which != Z_Operator_or))
2406         {
2407             /* parent node different from this one (or non-present) */
2408             /* we must combine result sets now */
2409             RSET rset;
2410             switch (zop->which)
2411             {
2412             case Z_Operator_and:
2413                 rset = rset_create_and(rset_nmem, kc,
2414                                        kc->scope,
2415                                        *num_result_sets, *result_sets);
2416                 break;
2417             case Z_Operator_or:
2418                 rset = rset_create_or(rset_nmem, kc,
2419                                       kc->scope, 0, /* termid */
2420                                       *num_result_sets, *result_sets);
2421                 break;
2422             case Z_Operator_and_not:
2423                 rset = rset_create_not(rset_nmem, kc,
2424                                        kc->scope,
2425                                        (*result_sets)[0],
2426                                        (*result_sets)[1]);
2427                 break;
2428             case Z_Operator_prox:
2429                 if (zop->u.prox->which != Z_ProximityOperator_known)
2430                 {
2431                     zebra_setError(zh, 
2432                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2433                                    0);
2434                     return ZEBRA_FAIL;
2435                 }
2436                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2437                 {
2438                     zebra_setError_zint(zh,
2439                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2440                                         *zop->u.prox->u.known);
2441                     return ZEBRA_FAIL;
2442                 }
2443                 else
2444                 {
2445                     rset = rset_create_prox(rset_nmem, kc,
2446                                             kc->scope,
2447                                             *num_result_sets, *result_sets, 
2448                                             *zop->u.prox->ordered,
2449                                             (!zop->u.prox->exclusion ? 
2450                                              0 : *zop->u.prox->exclusion),
2451                                             *zop->u.prox->relationType,
2452                                             *zop->u.prox->distance );
2453                 }
2454                 break;
2455             default:
2456                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2457                 return ZEBRA_FAIL;
2458             }
2459             *num_result_sets = 1;
2460             *result_sets = nmem_malloc(stream, *num_result_sets * 
2461                                        sizeof(**result_sets));
2462             (*result_sets)[0] = rset;
2463         }
2464     }
2465     else if (zs->which == Z_RPNStructure_simple)
2466     {
2467         RSET rset;
2468         ZEBRA_RES res;
2469
2470         if (zs->u.simple->which == Z_Operand_APT)
2471         {
2472             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2473             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2474                                  attributeSet, stream, sort_sequence,
2475                                  num_bases, basenames, rset_nmem, &rset,
2476                                  kc);
2477             if (res != ZEBRA_OK)
2478                 return res;
2479         }
2480         else if (zs->u.simple->which == Z_Operand_resultSetId)
2481         {
2482             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2483             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2484             if (!rset)
2485             {
2486                 zebra_setError(zh, 
2487                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2488                                zs->u.simple->u.resultSetId);
2489                 return ZEBRA_FAIL;
2490             }
2491             rset_dup(rset);
2492         }
2493         else
2494         {
2495             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2496             return ZEBRA_FAIL;
2497         }
2498         *num_result_sets = 1;
2499         *result_sets = nmem_malloc(stream, *num_result_sets * 
2500                                    sizeof(**result_sets));
2501         (*result_sets)[0] = rset;
2502     }
2503     else
2504     {
2505         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2506         return ZEBRA_FAIL;
2507     }
2508     return ZEBRA_OK;
2509 }
2510
2511
2512
2513 /*
2514  * Local variables:
2515  * c-basic-offset: 4
2516  * indent-tabs-mode: nil
2517  * End:
2518  * vim: shiftwidth=4 tabstop=8 expandtab
2519  */
2520