Changed the index type to a string everywhere.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.19 2007-10-31 16:56:14 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
69 }
70
71 #define TERM_COUNT        
72        
73 struct grep_info {        
74 #ifdef TERM_COUNT        
75     int *term_no;        
76 #endif        
77     ISAM_P *isam_p_buf;
78     int isam_p_size;        
79     int isam_p_indx;
80     int trunc_max;
81     ZebraHandle zh;
82     const char *index_type;
83     ZebraSet termset;
84 };        
85
86 static int add_isam_p(const char *name, const char *info,
87                       struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     /* we may have to stop this madness.. NOTE: -1 so that if
95        truncmax == trunxlimit we do *not* generate result sets */
96     if (p->isam_p_indx >= p->trunc_max - 1)
97         return 1;
98
99     if (p->isam_p_indx == p->isam_p_size)
100     {
101         ISAM_P *new_isam_p_buf;
102 #ifdef TERM_COUNT        
103         int *new_term_no;        
104 #endif
105         p->isam_p_size = 2*p->isam_p_size + 100;
106         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
107                                             p->isam_p_size);
108         if (p->isam_p_buf)
109         {
110             memcpy(new_isam_p_buf, p->isam_p_buf,
111                     p->isam_p_indx * sizeof(*p->isam_p_buf));
112             xfree(p->isam_p_buf);
113         }
114         p->isam_p_buf = new_isam_p_buf;
115
116 #ifdef TERM_COUNT
117         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
118         if (p->term_no)
119         {
120             memcpy(new_term_no, p->isam_p_buf,
121                     p->isam_p_indx * sizeof(*p->term_no));
122             xfree(p->term_no);
123         }
124         p->term_no = new_term_no;
125 #endif
126     }
127     assert(*info == sizeof(*p->isam_p_buf));
128     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
129
130     if (p->termset)
131     {
132         const char *db;
133         char term_tmp[IT_MAX_WORD];
134         int ord = 0;
135         const char *index_name;
136         int len = key_SU_decode(&ord, (const unsigned char *) name);
137         
138         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140         zebraExplain_lookup_ord(p->zh->reg->zei,
141                                 ord, 0 /* index_type */, &db, &index_name);
142         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
143         
144         resultSetAddTerm(p->zh, p->termset, name[len], db,
145                          index_name, term_tmp);
146     }
147     (p->isam_p_indx)++;
148     return 0;
149 }
150
151 static int grep_handle(char *name, const char *info, void *p)
152 {
153     return add_isam_p(name, info, (struct grep_info *) p);
154 }
155
156 static int term_pre(zebra_map_t zm, const char **src,
157                     const char *ct1, const char *ct2, int first)
158 {
159     const char *s1, *s0 = *src;
160     const char **map;
161
162     /* skip white space */
163     while (*s0)
164     {
165         if (ct1 && strchr(ct1, *s0))
166             break;
167         if (ct2 && strchr(ct2, *s0))
168             break;
169         s1 = s0;
170         map = zebra_maps_input(zm, &s1, strlen(s1), first);
171         if (**map != *CHR_SPACE)
172             break;
173         s0 = s1;
174     }
175     *src = s0;
176     return *s0;
177 }
178
179
180 static void esc_str(char *out_buf, size_t out_size,
181                     const char *in_buf, int in_size)
182 {
183     int k;
184
185     assert(out_buf);
186     assert(in_buf);
187     assert(out_size > 20);
188     *out_buf = '\0';
189     for (k = 0; k<in_size; k++)
190     {
191         int c = in_buf[k] & 0xff;
192         int pc;
193         if (c < 32 || c > 126)
194             pc = '?';
195         else
196             pc = c;
197         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
198         if (strlen(out_buf) > out_size-20)
199         {
200             strcat(out_buf, "..");
201             break;
202         }
203     }
204 }
205
206 #define REGEX_CHARS " []()|.*+?!"
207
208 static void add_non_space(const char *start, const char *end,
209                           WRBUF term_dict,
210                           char *dst_term, int *dst_ptr,
211                           const char **map, int q_map_match)
212 {
213     size_t sz = end - start;
214     memcpy(dst_term + *dst_ptr, start, sz);
215     (*dst_ptr) += sz;
216     if (!q_map_match)
217     {
218         while (start < end)
219         {
220             if (strchr(REGEX_CHARS, *start))
221                 wrbuf_putc(term_dict, '\\');
222             wrbuf_putc(term_dict, *start);
223             start++;
224         }
225     }
226     else
227     {
228         char tmpbuf[80];
229         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
230         
231         wrbuf_puts(term_dict, map[0]);
232     }
233 }
234
235 /* term_100: handle term, where trunc = none(no operators at all) */
236 static int term_100(zebra_map_t zm,
237                     const char **src, WRBUF term_dict, int space_split,
238                     char *dst_term)
239 {
240     const char *s0;
241     const char **map;
242     int i = 0;
243     int j = 0;
244
245     const char *space_start = 0;
246     const char *space_end = 0;
247
248     if (!term_pre(zm, src, NULL, NULL, !space_split))
249         return 0;
250     s0 = *src;
251     while (*s0)
252     {
253         const char *s1 = s0;
254         int q_map_match = 0;
255         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
256         if (space_split)
257         {
258             if (**map == *CHR_SPACE)
259                 break;
260         }
261         else  /* complete subfield only. */
262         {
263             if (**map == *CHR_SPACE)
264             {   /* save space mapping for later  .. */
265                 space_start = s1;
266                 space_end = s0;
267                 continue;
268             }
269             else if (space_start)
270             {   /* reload last space */
271                 while (space_start < space_end)
272                 {
273                     if (strchr(REGEX_CHARS, *space_start))
274                         wrbuf_putc(term_dict, '\\');
275                     dst_term[j++] = *space_start;
276                     wrbuf_putc(term_dict, *space_start);
277                     space_start++;
278                                
279                 }
280                 /* and reset */
281                 space_start = space_end = 0;
282             }
283         }
284         i++;
285
286         add_non_space(s1, s0, term_dict, dst_term, &j,
287                       map, q_map_match);
288     }
289     dst_term[j] = '\0';
290     *src = s0;
291     return i;
292 }
293
294 /* term_101: handle term, where trunc = Process # */
295 static int term_101(zebra_map_t zm,
296                     const char **src, WRBUF term_dict, int space_split,
297                     char *dst_term)
298 {
299     const char *s0;
300     const char **map;
301     int i = 0;
302     int j = 0;
303
304     if (!term_pre(zm, src, "#", "#", !space_split))
305         return 0;
306     s0 = *src;
307     while (*s0)
308     {
309         if (*s0 == '#')
310         {
311             i++;
312             wrbuf_puts(term_dict, ".*");
313             dst_term[j++] = *s0++;
314         }
315         else
316         {
317             const char *s1 = s0;
318             int q_map_match = 0;
319             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
320             if (space_split && **map == *CHR_SPACE)
321                 break;
322
323             i++;
324             add_non_space(s1, s0, term_dict, dst_term, &j,
325                           map, q_map_match);
326         }
327     }
328     dst_term[j++] = '\0';
329     *src = s0;
330     return i;
331 }
332
333 /* term_103: handle term, where trunc = re-2 (regular expressions) */
334 static int term_103(zebra_map_t zm, const char **src,
335                     WRBUF term_dict, int *errors, int space_split,
336                     char *dst_term)
337 {
338     int i = 0;
339     int j = 0;
340     const char *s0;
341     const char **map;
342
343     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
344         return 0;
345     s0 = *src;
346     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
347         isdigit(((const unsigned char *)s0)[1]))
348     {
349         *errors = s0[1] - '0';
350         s0 += 3;
351         if (*errors > 3)
352             *errors = 3;
353     }
354     while (*s0)
355     {
356         if (strchr("^\\()[].*+?|-", *s0))
357         {
358             dst_term[j++] = *s0;
359             wrbuf_putc(term_dict, *s0);
360             s0++;
361             i++;
362         }
363         else
364         {
365             const char *s1 = s0;
366             int q_map_match = 0;
367             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
368             if (space_split && **map == *CHR_SPACE)
369                 break;
370
371             i++;
372             add_non_space(s1, s0, term_dict, dst_term, &j,
373                           map, q_map_match);
374         }
375     }
376     dst_term[j] = '\0';
377     *src = s0;
378     
379     return i;
380 }
381
382 /* term_103: handle term, where trunc = re-1 (regular expressions) */
383 static int term_102(zebra_map_t zm, const char **src,
384                     WRBUF term_dict, int space_split, char *dst_term)
385 {
386     return term_103(zm, src, term_dict, NULL, space_split, dst_term);
387 }
388
389
390 /* term_104: handle term, process # and ! */
391 static int term_104(zebra_map_t zm, const char **src, 
392                     WRBUF term_dict, int space_split, char *dst_term)
393 {
394     const char *s0;
395     const char **map;
396     int i = 0;
397     int j = 0;
398
399     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
400         return 0;
401     s0 = *src;
402     while (*s0)
403     {
404         if (*s0 == '?')
405         {
406             i++;
407             dst_term[j++] = *s0++;
408             if (*s0 >= '0' && *s0 <= '9')
409             {
410                 int limit = 0;
411                 while (*s0 >= '0' && *s0 <= '9')
412                 {
413                     limit = limit * 10 + (*s0 - '0');
414                     dst_term[j++] = *s0++;
415                 }
416                 if (limit > 20)
417                     limit = 20;
418                 while (--limit >= 0)
419                 {
420                     wrbuf_puts(term_dict, ".?");
421                 }
422             }
423             else
424             {
425                 wrbuf_puts(term_dict, ".*");
426             }
427         }
428         else if (*s0 == '*')
429         {
430             i++;
431             wrbuf_puts(term_dict, ".*");
432             dst_term[j++] = *s0++;
433         }
434         else if (*s0 == '#')
435         {
436             i++;
437             wrbuf_puts(term_dict, ".");
438             dst_term[j++] = *s0++;
439         }
440         else
441         {
442             const char *s1 = s0;
443             int q_map_match = 0;
444             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
445             if (space_split && **map == *CHR_SPACE)
446                 break;
447
448             i++;
449             add_non_space(s1, s0, term_dict, dst_term, &j,
450                           map, q_map_match);
451         }
452     }
453     dst_term[j++] = '\0';
454     *src = s0;
455     return i;
456 }
457
458 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
459 static int term_105(zebra_map_t zm, const char **src, 
460                     WRBUF term_dict, int space_split,
461                     char *dst_term, int right_truncate)
462 {
463     const char *s0;
464     const char **map;
465     int i = 0;
466     int j = 0;
467
468     if (!term_pre(zm, src, "*!", "*!", !space_split))
469         return 0;
470     s0 = *src;
471     while (*s0)
472     {
473         if (*s0 == '*')
474         {
475             i++;
476             wrbuf_puts(term_dict, ".*");
477             dst_term[j++] = *s0++;
478         }
479         else if (*s0 == '!')
480         {
481             i++;
482             wrbuf_putc(term_dict, '.');
483             dst_term[j++] = *s0++;
484         }
485         else
486         {
487             const char *s1 = s0;
488             int q_map_match = 0;
489             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
490             if (space_split && **map == *CHR_SPACE)
491                 break;
492
493             i++;
494             add_non_space(s1, s0, term_dict, dst_term, &j,
495                           map, q_map_match);
496         }
497     }
498     if (right_truncate)
499         wrbuf_puts(term_dict, ".*");
500     dst_term[j++] = '\0';
501     *src = s0;
502     return i;
503 }
504
505
506 /* gen_regular_rel - generate regular expression from relation
507  *  val:     border value (inclusive)
508  *  islt:    1 if <=; 0 if >=.
509  */
510 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
511 {
512     char dst_buf[20*5*20]; /* assuming enough for expansion */
513     char *dst = dst_buf;
514     int dst_p;
515     int w, d, i;
516     int pos = 0;
517     char numstr[20];
518
519     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
520     if (val >= 0)
521     {
522         if (islt)
523             strcpy(dst, "(-[0-9]+|(");
524         else
525             strcpy(dst, "((");
526     } 
527     else
528     {
529         if (!islt)
530         {
531             strcpy(dst, "([0-9]+|-(");
532             islt = 1;
533         }
534         else
535         {
536             strcpy(dst, "(-(");
537             islt = 0;
538         }
539         val = -val;
540     }
541     dst_p = strlen(dst);
542     sprintf(numstr, "%d", val);
543     for (w = strlen(numstr); --w >= 0; pos++)
544     {
545         d = numstr[w];
546         if (pos > 0)
547         {
548             if (islt)
549             {
550                 if (d == '0')
551                     continue;
552                 d--;
553             } 
554             else
555             {
556                 if (d == '9')
557                     continue;
558                 d++;
559             }
560         }
561         
562         strcpy(dst + dst_p, numstr);
563         dst_p = strlen(dst) - pos - 1;
564
565         if (islt)
566         {
567             if (d != '0')
568             {
569                 dst[dst_p++] = '[';
570                 dst[dst_p++] = '0';
571                 dst[dst_p++] = '-';
572                 dst[dst_p++] = d;
573                 dst[dst_p++] = ']';
574             }
575             else
576                 dst[dst_p++] = d;
577         }
578         else
579         {
580             if (d != '9')
581             { 
582                 dst[dst_p++] = '[';
583                 dst[dst_p++] = d;
584                 dst[dst_p++] = '-';
585                 dst[dst_p++] = '9';
586                 dst[dst_p++] = ']';
587             }
588             else
589                 dst[dst_p++] = d;
590         }
591         for (i = 0; i<pos; i++)
592         {
593             dst[dst_p++] = '[';
594             dst[dst_p++] = '0';
595             dst[dst_p++] = '-';
596             dst[dst_p++] = '9';
597             dst[dst_p++] = ']';
598         }
599         dst[dst_p++] = '|';
600     }
601     dst[dst_p] = '\0';
602     if (islt)
603     {
604         /* match everything less than 10^(pos-1) */
605         strcat(dst, "0*");
606         for (i = 1; i<pos; i++)
607             strcat(dst, "[0-9]?");
608     }
609     else
610     {
611         /* match everything greater than 10^pos */
612         for (i = 0; i <= pos; i++)
613             strcat(dst, "[0-9]");
614         strcat(dst, "[0-9]*");
615     }
616     strcat(dst, "))");
617     wrbuf_puts(term_dict, dst);
618 }
619
620 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
621 {
622     const char *src = wrbuf_cstr(wsrc);
623     if (src[*indx] == '\\')
624     {
625         wrbuf_putc(term_p, src[*indx]);
626         (*indx)++;
627     }
628     wrbuf_putc(term_p, src[*indx]);
629     (*indx)++;
630 }
631
632 /*
633  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
634  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
635  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
636  *              ([^-a].*|a[^-b].*|ab[c-].*)
637  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
638  *              ([^a-].*|a[^b-].*|ab[^c-].*)
639  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
640  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
641  */
642 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
643                            const char **term_sub, WRBUF term_dict,
644                            const Odr_oid *attributeSet,
645                            zebra_map_t zm, int space_split, char *term_dst,
646                            int *error_code)
647 {
648     AttrType relation;
649     int relation_value;
650     int i;
651     WRBUF term_component = wrbuf_alloc();
652
653     attr_init_APT(&relation, zapt, 2);
654     relation_value = attr_find(&relation, NULL);
655
656     *error_code = 0;
657     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
658     switch (relation_value)
659     {
660     case 1:
661         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
662         {
663             wrbuf_destroy(term_component);
664             return 0;
665         }
666         yaz_log(log_level_rpn, "Relation <");
667         
668         wrbuf_putc(term_dict, '(');
669         for (i = 0; i < wrbuf_len(term_component); )
670         {
671             int j = 0;
672             
673             if (i)
674                 wrbuf_putc(term_dict, '|');
675             while (j < i)
676                 string_rel_add_char(term_dict, term_component, &j);
677
678             wrbuf_putc(term_dict, '[');
679
680             wrbuf_putc(term_dict, '^');
681             
682             wrbuf_putc(term_dict, 1);
683             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
684             
685             string_rel_add_char(term_dict, term_component, &i);
686             wrbuf_putc(term_dict, '-');
687             
688             wrbuf_putc(term_dict, ']');
689             wrbuf_putc(term_dict, '.');
690             wrbuf_putc(term_dict, '*');
691         }
692         wrbuf_putc(term_dict, ')');
693         break;
694     case 2:
695         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
696         {
697             wrbuf_destroy(term_component);
698             return 0;
699         }
700         yaz_log(log_level_rpn, "Relation <=");
701
702         wrbuf_putc(term_dict, '(');
703         for (i = 0; i < wrbuf_len(term_component); )
704         {
705             int j = 0;
706
707             while (j < i)
708                 string_rel_add_char(term_dict, term_component, &j);
709             wrbuf_putc(term_dict, '[');
710
711             wrbuf_putc(term_dict, '^');
712
713             wrbuf_putc(term_dict, 1);
714             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
715
716             string_rel_add_char(term_dict, term_component, &i);
717             wrbuf_putc(term_dict, '-');
718
719             wrbuf_putc(term_dict, ']');
720             wrbuf_putc(term_dict, '.');
721             wrbuf_putc(term_dict, '*');
722
723             wrbuf_putc(term_dict, '|');
724         }
725         for (i = 0; i < wrbuf_len(term_component); )
726             string_rel_add_char(term_dict, term_component, &i);
727         wrbuf_putc(term_dict, ')');
728         break;
729     case 5:
730         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
731         {
732             wrbuf_destroy(term_component);
733             return 0;
734         }
735         yaz_log(log_level_rpn, "Relation >");
736
737         wrbuf_putc(term_dict, '(');
738         for (i = 0; i < wrbuf_len(term_component); )
739         {
740             int j = 0;
741
742             while (j < i)
743                 string_rel_add_char(term_dict, term_component, &j);
744             wrbuf_putc(term_dict, '[');
745             
746             wrbuf_putc(term_dict, '^');
747             wrbuf_putc(term_dict, '-');
748             string_rel_add_char(term_dict, term_component, &i);
749
750             wrbuf_putc(term_dict, ']');
751             wrbuf_putc(term_dict, '.');
752             wrbuf_putc(term_dict, '*');
753
754             wrbuf_putc(term_dict, '|');
755         }
756         for (i = 0; i < wrbuf_len(term_component); )
757             string_rel_add_char(term_dict, term_component, &i);
758         wrbuf_putc(term_dict, '.');
759         wrbuf_putc(term_dict, '+');
760         wrbuf_putc(term_dict, ')');
761         break;
762     case 4:
763         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
764         {
765             wrbuf_destroy(term_component);
766             return 0;
767         }
768         yaz_log(log_level_rpn, "Relation >=");
769
770         wrbuf_putc(term_dict, '(');
771         for (i = 0; i < wrbuf_len(term_component); )
772         {
773             int j = 0;
774
775             if (i)
776                 wrbuf_putc(term_dict, '|');
777             while (j < i)
778                 string_rel_add_char(term_dict, term_component, &j);
779             wrbuf_putc(term_dict, '[');
780
781             if (i < wrbuf_len(term_component)-1)
782             {
783                 wrbuf_putc(term_dict, '^');
784                 wrbuf_putc(term_dict, '-');
785                 string_rel_add_char(term_dict, term_component, &i);
786             }
787             else
788             {
789                 string_rel_add_char(term_dict, term_component, &i);
790                 wrbuf_putc(term_dict, '-');
791             }
792             wrbuf_putc(term_dict, ']');
793             wrbuf_putc(term_dict, '.');
794             wrbuf_putc(term_dict, '*');
795         }
796         wrbuf_putc(term_dict, ')');
797         break;
798     case 3:
799     case 102:
800     case -1:
801         if (!**term_sub)
802             return 1;
803         yaz_log(log_level_rpn, "Relation =");
804         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
805         {
806             wrbuf_destroy(term_component);
807             return 0;
808         }
809         wrbuf_puts(term_dict, "(");
810         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
811         wrbuf_puts(term_dict, ")");
812         break;
813     case 103:
814         yaz_log(log_level_rpn, "Relation always matches");
815         /* skip to end of term (we don't care what it is) */
816         while (**term_sub != '\0')
817             (*term_sub)++;
818         break;
819     default:
820         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
821         wrbuf_destroy(term_component);
822         return 0;
823     }
824     wrbuf_destroy(term_component);
825     return 1;
826 }
827
828 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
829                              const char **term_sub, 
830                              WRBUF term_dict,
831                              const Odr_oid *attributeSet, NMEM stream,
832                              struct grep_info *grep_info,
833                              const char *index_type, int complete_flag,
834                              char *term_dst,
835                              const char *xpath_use,
836                              struct ord_list **ol);
837
838 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
839                                  Z_AttributesPlusTerm *zapt,
840                                  zint *hits_limit_value,
841                                  const char **term_ref_id_str,
842                                  NMEM nmem)
843 {
844     AttrType term_ref_id_attr;
845     AttrType hits_limit_attr;
846     int term_ref_id_int;
847  
848     attr_init_APT(&hits_limit_attr, zapt, 11);
849     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
850
851     attr_init_APT(&term_ref_id_attr, zapt, 10);
852     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
853     if (term_ref_id_int >= 0)
854     {
855         char *res = nmem_malloc(nmem, 20);
856         sprintf(res, "%d", term_ref_id_int);
857         *term_ref_id_str = res;
858     }
859
860     /* no limit given ? */
861     if (*hits_limit_value == -1)
862     {
863         if (*term_ref_id_str)
864         {
865             /* use global if term_ref is present */
866             *hits_limit_value = zh->approx_limit;
867         }
868         else
869         {
870             /* no counting if term_ref is not present */
871             *hits_limit_value = 0;
872         }
873     }
874     else if (*hits_limit_value == 0)
875     {
876         /* 0 is the same as global limit */
877         *hits_limit_value = zh->approx_limit;
878     }
879     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
880             *term_ref_id_str ? *term_ref_id_str : "none",
881             *hits_limit_value);
882     return ZEBRA_OK;
883 }
884
885 static ZEBRA_RES term_trunc(ZebraHandle zh,
886                             Z_AttributesPlusTerm *zapt,
887                             const char **term_sub, 
888                             const Odr_oid *attributeSet, NMEM stream,
889                             struct grep_info *grep_info,
890                             const char *index_type, int complete_flag,
891                             char *term_dst,
892                             const char *rank_type, 
893                             const char *xpath_use,
894                             NMEM rset_nmem,
895                             RSET *rset,
896                             struct rset_key_control *kc)
897 {
898     ZEBRA_RES res;
899     struct ord_list *ol;
900     zint hits_limit_value;
901     const char *term_ref_id_str = 0;
902     WRBUF term_dict = wrbuf_alloc();
903
904     *rset = 0;
905     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
906     grep_info->isam_p_indx = 0;
907     res = string_term(zh, zapt, term_sub, term_dict,
908                       attributeSet, stream, grep_info,
909                       index_type, complete_flag,
910                       term_dst, xpath_use, &ol);
911     wrbuf_destroy(term_dict);
912     if (res != ZEBRA_OK)
913         return res;
914     if (!*term_sub)  /* no more terms ? */
915         return res;
916     yaz_log(log_level_rpn, "term: %s", term_dst);
917     *rset = rset_trunc(zh, grep_info->isam_p_buf,
918                        grep_info->isam_p_indx, term_dst,
919                        strlen(term_dst), rank_type, 1 /* preserve pos */,
920                        zapt->term->which, rset_nmem,
921                        kc, kc->scope, ol, index_type, hits_limit_value,
922                        term_ref_id_str);
923     if (!*rset)
924         return ZEBRA_FAIL;
925     return ZEBRA_OK;
926 }
927
928 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
929                              const char **term_sub, 
930                              WRBUF term_dict,
931                              const Odr_oid *attributeSet, NMEM stream,
932                              struct grep_info *grep_info,
933                              const char *index_type, int complete_flag,
934                              char *term_dst,
935                              const char *xpath_use,
936                              struct ord_list **ol)
937 {
938     int r;
939     AttrType truncation;
940     int truncation_value;
941     const char *termp;
942     struct rpn_char_map_info rcmi;
943
944     int space_split = complete_flag ? 0 : 1;
945     int ord = -1;
946     int regex_range = 0;
947     int max_pos, prefix_len = 0;
948     int relation_error;
949     char ord_buf[32];
950     int ord_len, i;
951     zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
952     
953     *ol = ord_list_create(stream);
954
955     rpn_char_map_prepare(zh->reg, zm, &rcmi);
956     attr_init_APT(&truncation, zapt, 5);
957     truncation_value = attr_find(&truncation, NULL);
958     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
959
960     termp = *term_sub; /* start of term for each database */
961     
962     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
963                           attributeSet, &ord) != ZEBRA_OK)
964     {
965         *term_sub = 0;
966         return ZEBRA_FAIL;
967     }
968     
969     wrbuf_rewind(term_dict); /* new dictionary regexp term */
970     
971     *ol = ord_list_append(stream, *ol, ord);
972     ord_len = key_SU_encode(ord, ord_buf);
973     
974     wrbuf_putc(term_dict, '(');
975     
976     for (i = 0; i<ord_len; i++)
977     {
978         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
979         wrbuf_putc(term_dict, ord_buf[i]);
980     }
981     wrbuf_putc(term_dict, ')');
982     
983     prefix_len = wrbuf_len(term_dict);
984     
985     switch (truncation_value)
986     {
987     case -1:         /* not specified */
988     case 100:        /* do not truncate */
989         if (!string_relation(zh, zapt, &termp, term_dict,
990                              attributeSet,
991                              zm, space_split, term_dst,
992                              &relation_error))
993         {
994             if (relation_error)
995             {
996                 zebra_setError(zh, relation_error, 0);
997                 return ZEBRA_FAIL;
998             }
999             *term_sub = 0;
1000             return ZEBRA_OK;
1001         }
1002         break;
1003     case 1:          /* right truncation */
1004         wrbuf_putc(term_dict, '(');
1005         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1006         {
1007             *term_sub = 0;
1008             return ZEBRA_OK;
1009         }
1010         wrbuf_puts(term_dict, ".*)");
1011         break;
1012     case 2:          /* keft truncation */
1013         wrbuf_puts(term_dict, "(.*");
1014         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1015         {
1016             *term_sub = 0;
1017             return ZEBRA_OK;
1018         }
1019         wrbuf_putc(term_dict, ')');
1020         break;
1021     case 3:          /* left&right truncation */
1022         wrbuf_puts(term_dict, "(.*");
1023         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1024         {
1025             *term_sub = 0;
1026             return ZEBRA_OK;
1027         }
1028         wrbuf_puts(term_dict, ".*)");
1029         break;
1030     case 101:        /* process # in term */
1031         wrbuf_putc(term_dict, '(');
1032         if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1033         {
1034             *term_sub = 0;
1035             return ZEBRA_OK;
1036         }
1037         wrbuf_puts(term_dict, ")");
1038         break;
1039     case 102:        /* Regexp-1 */
1040         wrbuf_putc(term_dict, '(');
1041         if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1042         {
1043             *term_sub = 0;
1044             return ZEBRA_OK;
1045         }
1046         wrbuf_putc(term_dict, ')');
1047         break;
1048     case 103:       /* Regexp-2 */
1049         regex_range = 1;
1050         wrbuf_putc(term_dict, '(');
1051         if (!term_103(zm, &termp, term_dict, &regex_range,
1052                       space_split, term_dst))
1053         {
1054             *term_sub = 0;
1055             return ZEBRA_OK;
1056         }
1057         wrbuf_putc(term_dict, ')');
1058         break;
1059     case 104:        /* process # and ! in term */
1060         wrbuf_putc(term_dict, '(');
1061         if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1062         {
1063             *term_sub = 0;
1064             return ZEBRA_OK;
1065         }
1066         wrbuf_putc(term_dict, ')');
1067         break;
1068     case 105:        /* process * and ! in term */
1069         wrbuf_putc(term_dict, '(');
1070         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1071         {
1072             *term_sub = 0;
1073             return ZEBRA_OK;
1074         }
1075         wrbuf_putc(term_dict, ')');
1076         break;
1077     case 106:        /* process * and ! in term */
1078         wrbuf_putc(term_dict, '(');
1079         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1080         {
1081             *term_sub = 0;
1082             return ZEBRA_OK;
1083         }
1084         wrbuf_putc(term_dict, ')');
1085         break;
1086     default:
1087         zebra_setError_zint(zh,
1088                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1089                             truncation_value);
1090         return ZEBRA_FAIL;
1091     }
1092     if (1)
1093     {
1094         char buf[1000];
1095         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1096         esc_str(buf, sizeof(buf), input, strlen(input));
1097     }
1098     yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1099             wrbuf_cstr(term_dict) + prefix_len);
1100     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1101                          grep_info, &max_pos, 
1102                          ord_len /* number of "exact" chars */,
1103                          grep_handle);
1104     if (r == 1)
1105         zebra_set_partial_result(zh);
1106     else if (r)
1107         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1108     *term_sub = termp;
1109     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1110     return ZEBRA_OK;
1111 }
1112
1113
1114
1115 static void grep_info_delete(struct grep_info *grep_info)
1116 {
1117 #ifdef TERM_COUNT
1118     xfree(grep_info->term_no);
1119 #endif
1120     xfree(grep_info->isam_p_buf);
1121 }
1122
1123 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1124                                    Z_AttributesPlusTerm *zapt,
1125                                    struct grep_info *grep_info,
1126                                    const char *index_type)
1127 {
1128 #ifdef TERM_COUNT
1129     grep_info->term_no = 0;
1130 #endif
1131     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1132     grep_info->isam_p_size = 0;
1133     grep_info->isam_p_buf = NULL;
1134     grep_info->zh = zh;
1135     grep_info->index_type = index_type;
1136     grep_info->termset = 0;
1137     if (zapt)
1138     {
1139         AttrType truncmax;
1140         int truncmax_value;
1141
1142         attr_init_APT(&truncmax, zapt, 13);
1143         truncmax_value = attr_find(&truncmax, NULL);
1144         if (truncmax_value != -1)
1145             grep_info->trunc_max = truncmax_value;
1146     }
1147     if (zapt)
1148     {
1149         AttrType termset;
1150         int termset_value_numeric;
1151         const char *termset_value_string;
1152
1153         attr_init_APT(&termset, zapt, 8);
1154         termset_value_numeric =
1155             attr_find_ex(&termset, NULL, &termset_value_string);
1156         if (termset_value_numeric != -1)
1157         {
1158 #if TERMSET_DISABLE
1159             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1160             return ZEBRA_FAIL;
1161 #else
1162             char resname[32];
1163             const char *termset_name = 0;
1164             if (termset_value_numeric != -2)
1165             {
1166                 
1167                 sprintf(resname, "%d", termset_value_numeric);
1168                 termset_name = resname;
1169             }
1170             else
1171             termset_name = termset_value_string;
1172             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1173             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1174             if (!grep_info->termset)
1175             {
1176                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1177                 return ZEBRA_FAIL;
1178             }
1179 #endif
1180         }
1181     }
1182     return ZEBRA_OK;
1183 }
1184                                
1185 /**
1186   \brief Create result set(s) for list of terms
1187   \param zh Zebra Handle
1188   \param zapt Attributes Plust Term (RPN leaf)
1189   \param termz term as used in query but converted to UTF-8
1190   \param attributeSet default attribute set
1191   \param stream memory for result
1192   \param index_type register type ("w", "p",..)
1193   \param complete_flag whether it's phrases or not
1194   \param rank_type term flags for ranking
1195   \param xpath_use use attribute for X-Path (-1 for no X-path)
1196   \param rset_nmem memory for result sets
1197   \param result_sets output result set for each term in list (output)
1198   \param num_result_sets number of output result sets
1199   \param kc rset key control to be used for created result sets
1200 */
1201 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1202                                  Z_AttributesPlusTerm *zapt,
1203                                  const char *termz,
1204                                  const Odr_oid *attributeSet,
1205                                  NMEM stream,
1206                                  const char *index_type, int complete_flag,
1207                                  const char *rank_type,
1208                                  const char *xpath_use,
1209                                  NMEM rset_nmem,
1210                                  RSET **result_sets, int *num_result_sets,
1211                                  struct rset_key_control *kc)
1212 {
1213     char term_dst[IT_MAX_WORD+1];
1214     struct grep_info grep_info;
1215     const char *termp = termz;
1216     int alloc_sets = 0;
1217
1218     *num_result_sets = 0;
1219     *term_dst = 0;
1220     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1221         return ZEBRA_FAIL;
1222     while(1)
1223     { 
1224         ZEBRA_RES res;
1225
1226         if (alloc_sets == *num_result_sets)
1227         {
1228             int add = 10;
1229             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1230                                               sizeof(*rnew));
1231             if (alloc_sets)
1232                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1233             alloc_sets = alloc_sets + add;
1234             *result_sets = rnew;
1235         }
1236         res = term_trunc(zh, zapt, &termp, attributeSet,
1237                          stream, &grep_info,
1238                          index_type, complete_flag,
1239                          term_dst, rank_type,
1240                          xpath_use, rset_nmem,
1241                          &(*result_sets)[*num_result_sets],
1242                          kc);
1243         if (res != ZEBRA_OK)
1244         {
1245             int i;
1246             for (i = 0; i < *num_result_sets; i++)
1247                 rset_delete((*result_sets)[i]);
1248             grep_info_delete(&grep_info);
1249             return res;
1250         }
1251         if ((*result_sets)[*num_result_sets] == 0)
1252             break;
1253         (*num_result_sets)++;
1254
1255         if (!*termp)
1256             break;
1257     }
1258     grep_info_delete(&grep_info);
1259     return ZEBRA_OK;
1260 }
1261
1262 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1263                                          Z_AttributesPlusTerm *zapt,
1264                                          const Odr_oid *attributeSet,
1265                                          const char *index_type,
1266                                          NMEM rset_nmem,
1267                                          RSET *rset,
1268                                          struct rset_key_control *kc)
1269 {
1270     int position_value;
1271     AttrType position;
1272     int ord = -1;
1273     char ord_buf[32];
1274     char term_dict[100];
1275     int ord_len;
1276     char *val;
1277     ISAM_P isam_p;
1278     zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
1279     
1280     attr_init_APT(&position, zapt, 3);
1281     position_value = attr_find(&position, NULL);
1282     switch(position_value)
1283     {
1284     case 3:
1285     case -1:
1286         return ZEBRA_OK;
1287     case 1:
1288     case 2:
1289         break;
1290     default:
1291         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1292                             position_value);
1293         return ZEBRA_FAIL;
1294     }
1295
1296
1297     if (!zebra_maps_is_first_in_field(zm))
1298     {
1299         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1300                             position_value);
1301         return ZEBRA_FAIL;
1302     }
1303
1304     if (!zh->reg->isamb && !zh->reg->isamc)
1305     {
1306         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1307                             position_value);
1308         return ZEBRA_FAIL;
1309     }
1310
1311     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1312                           attributeSet, &ord) != ZEBRA_OK)
1313     {
1314         return ZEBRA_FAIL;
1315     }
1316     ord_len = key_SU_encode(ord, ord_buf);
1317     memcpy(term_dict, ord_buf, ord_len);
1318     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1319     val = dict_lookup(zh->reg->dict, term_dict);
1320     if (val)
1321     {
1322         assert(*val == sizeof(ISAM_P));
1323         memcpy(&isam_p, val+1, sizeof(isam_p));
1324         
1325         if (zh->reg->isamb)
1326             *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1327                                    zh->reg->isamb, isam_p, 0);
1328         else if (zh->reg->isamc)
1329             *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1330                                    zh->reg->isamc, isam_p, 0);
1331     }
1332     return ZEBRA_OK;
1333 }
1334                                          
1335 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1336                                        Z_AttributesPlusTerm *zapt,
1337                                        const char *termz_org,
1338                                        const Odr_oid *attributeSet,
1339                                        NMEM stream,
1340                                        const char *index_type, int complete_flag,
1341                                        const char *rank_type,
1342                                        const char *xpath_use,
1343                                        NMEM rset_nmem,
1344                                        RSET *rset,
1345                                        struct rset_key_control *kc)
1346 {
1347     RSET *result_sets = 0;
1348     int num_result_sets = 0;
1349     ZEBRA_RES res =
1350         term_list_trunc(zh, zapt, termz_org, attributeSet,
1351                         stream, index_type, complete_flag,
1352                         rank_type, xpath_use,
1353                         rset_nmem,
1354                         &result_sets, &num_result_sets, kc);
1355
1356     if (res != ZEBRA_OK)
1357         return res;
1358
1359     if (num_result_sets > 0)
1360     {
1361         RSET first_set = 0;
1362         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1363                                       index_type,
1364                                       rset_nmem, &first_set,
1365                                       kc);
1366         if (res != ZEBRA_OK)
1367             return res;
1368         if (first_set)
1369         {
1370             RSET *nsets = nmem_malloc(stream,
1371                                       sizeof(RSET) * (num_result_sets+1));
1372             nsets[0] = first_set;
1373             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1374             result_sets = nsets;
1375             num_result_sets++;
1376         }
1377     }
1378     if (num_result_sets == 0)
1379         *rset = rset_create_null(rset_nmem, kc, 0); 
1380     else if (num_result_sets == 1)
1381         *rset = result_sets[0];
1382     else
1383         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1384                                  num_result_sets, result_sets,
1385                                  1 /* ordered */, 0 /* exclusion */,
1386                                  3 /* relation */, 1 /* distance */);
1387     if (!*rset)
1388         return ZEBRA_FAIL;
1389     return ZEBRA_OK;
1390 }
1391
1392 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1393                                         Z_AttributesPlusTerm *zapt,
1394                                         const char *termz_org,
1395                                         const Odr_oid *attributeSet,
1396                                         NMEM stream,
1397                                         const char *index_type, 
1398                                         int complete_flag,
1399                                         const char *rank_type,
1400                                         const char *xpath_use,
1401                                         NMEM rset_nmem,
1402                                         RSET *rset,
1403                                         struct rset_key_control *kc)
1404 {
1405     RSET *result_sets = 0;
1406     int num_result_sets = 0;
1407     int i;
1408     ZEBRA_RES res =
1409         term_list_trunc(zh, zapt, termz_org, attributeSet,
1410                         stream, index_type, complete_flag,
1411                         rank_type, xpath_use,
1412                         rset_nmem,
1413                         &result_sets, &num_result_sets, kc);
1414     if (res != ZEBRA_OK)
1415         return res;
1416
1417     for (i = 0; i<num_result_sets; i++)
1418     {
1419         RSET first_set = 0;
1420         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1421                                       index_type,
1422                                       rset_nmem, &first_set,
1423                                       kc);
1424         if (res != ZEBRA_OK)
1425         {
1426             for (i = 0; i<num_result_sets; i++)
1427                 rset_delete(result_sets[i]);
1428             return res;
1429         }
1430
1431         if (first_set)
1432         {
1433             RSET tmp_set[2];
1434
1435             tmp_set[0] = first_set;
1436             tmp_set[1] = result_sets[i];
1437             
1438             result_sets[i] = rset_create_prox(
1439                 rset_nmem, kc, kc->scope,
1440                 2, tmp_set,
1441                 1 /* ordered */, 0 /* exclusion */,
1442                 3 /* relation */, 1 /* distance */);
1443         }
1444     }
1445     if (num_result_sets == 0)
1446         *rset = rset_create_null(rset_nmem, kc, 0); 
1447     else if (num_result_sets == 1)
1448         *rset = result_sets[0];
1449     else
1450         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1451                                num_result_sets, result_sets);
1452     if (!*rset)
1453         return ZEBRA_FAIL;
1454     return ZEBRA_OK;
1455 }
1456
1457 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1458                                          Z_AttributesPlusTerm *zapt,
1459                                          const char *termz_org,
1460                                          const Odr_oid *attributeSet,
1461                                          NMEM stream,
1462                                          const char *index_type, 
1463                                          int complete_flag,
1464                                          const char *rank_type, 
1465                                          const char *xpath_use,
1466                                          NMEM rset_nmem,
1467                                          RSET *rset,
1468                                          struct rset_key_control *kc)
1469 {
1470     RSET *result_sets = 0;
1471     int num_result_sets = 0;
1472     int i;
1473     ZEBRA_RES res =
1474         term_list_trunc(zh, zapt, termz_org, attributeSet,
1475                         stream, index_type, complete_flag,
1476                         rank_type, xpath_use,
1477                         rset_nmem,
1478                         &result_sets, &num_result_sets,
1479                         kc);
1480     if (res != ZEBRA_OK)
1481         return res;
1482     for (i = 0; i<num_result_sets; i++)
1483     {
1484         RSET first_set = 0;
1485         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1486                                       index_type,
1487                                       rset_nmem, &first_set,
1488                                       kc);
1489         if (res != ZEBRA_OK)
1490         {
1491             for (i = 0; i<num_result_sets; i++)
1492                 rset_delete(result_sets[i]);
1493             return res;
1494         }
1495
1496         if (first_set)
1497         {
1498             RSET tmp_set[2];
1499
1500             tmp_set[0] = first_set;
1501             tmp_set[1] = result_sets[i];
1502             
1503             result_sets[i] = rset_create_prox(
1504                 rset_nmem, kc, kc->scope,
1505                 2, tmp_set,
1506                 1 /* ordered */, 0 /* exclusion */,
1507                 3 /* relation */, 1 /* distance */);
1508         }
1509     }
1510
1511
1512     if (num_result_sets == 0)
1513         *rset = rset_create_null(rset_nmem, kc, 0); 
1514     else if (num_result_sets == 1)
1515         *rset = result_sets[0];
1516     else
1517         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1518                                num_result_sets, result_sets);
1519     if (!*rset)
1520         return ZEBRA_FAIL;
1521     return ZEBRA_OK;
1522 }
1523
1524 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1525                             const char **term_sub,
1526                             WRBUF term_dict,
1527                             const Odr_oid *attributeSet,
1528                             struct grep_info *grep_info,
1529                             int *max_pos,
1530                             zebra_map_t zm,
1531                             char *term_dst,
1532                             int *error_code)
1533 {
1534     AttrType relation;
1535     int relation_value;
1536     int term_value;
1537     int r;
1538     WRBUF term_num = wrbuf_alloc();
1539
1540     *error_code = 0;
1541     attr_init_APT(&relation, zapt, 2);
1542     relation_value = attr_find(&relation, NULL);
1543
1544     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1545
1546     switch (relation_value)
1547     {
1548     case 1:
1549         yaz_log(log_level_rpn, "Relation <");
1550         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1551         { 
1552             wrbuf_destroy(term_num);
1553             return 0;
1554         }
1555         term_value = atoi(wrbuf_cstr(term_num));
1556         gen_regular_rel(term_dict, term_value-1, 1);
1557         break;
1558     case 2:
1559         yaz_log(log_level_rpn, "Relation <=");
1560         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1561         {
1562             wrbuf_destroy(term_num);
1563             return 0;
1564         }
1565         term_value = atoi(wrbuf_cstr(term_num));
1566         gen_regular_rel(term_dict, term_value, 1);
1567         break;
1568     case 4:
1569         yaz_log(log_level_rpn, "Relation >=");
1570         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1571         {
1572             wrbuf_destroy(term_num);
1573             return 0;
1574         }
1575         term_value = atoi(wrbuf_cstr(term_num));
1576         gen_regular_rel(term_dict, term_value, 0);
1577         break;
1578     case 5:
1579         yaz_log(log_level_rpn, "Relation >");
1580         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1581         {
1582             wrbuf_destroy(term_num);
1583             return 0;
1584         }
1585         term_value = atoi(wrbuf_cstr(term_num));
1586         gen_regular_rel(term_dict, term_value+1, 0);
1587         break;
1588     case -1:
1589     case 3:
1590         yaz_log(log_level_rpn, "Relation =");
1591         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1592         {
1593             wrbuf_destroy(term_num);
1594             return 0; 
1595         }
1596         term_value = atoi(wrbuf_cstr(term_num));
1597         wrbuf_printf(term_dict, "(0*%d)", term_value);
1598         break;
1599     case 103:
1600         /* term_tmp untouched.. */
1601         while (**term_sub != '\0')
1602             (*term_sub)++;
1603         break;
1604     default:
1605         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1606         wrbuf_destroy(term_num); 
1607         return 0;
1608     }
1609     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1610                          0, grep_info, max_pos, 0, grep_handle);
1611
1612     if (r == 1)
1613         zebra_set_partial_result(zh);
1614     else if (r)
1615         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1616     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1617     wrbuf_destroy(term_num);
1618     return 1;
1619 }
1620
1621 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1622                               const char **term_sub, 
1623                               WRBUF term_dict,
1624                               const Odr_oid *attributeSet, NMEM stream,
1625                               struct grep_info *grep_info,
1626                               const char *index_type, int complete_flag,
1627                               char *term_dst, 
1628                               const char *xpath_use,
1629                               struct ord_list **ol)
1630 {
1631     const char *termp;
1632     struct rpn_char_map_info rcmi;
1633     int max_pos;
1634     int relation_error = 0;
1635     int ord, ord_len, i;
1636     char ord_buf[32];
1637     zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type);
1638     
1639     *ol = ord_list_create(stream);
1640
1641     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1642
1643     termp = *term_sub;
1644     
1645     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1646                           attributeSet, &ord) != ZEBRA_OK)
1647     {
1648         return ZEBRA_FAIL;
1649     }
1650     
1651     wrbuf_rewind(term_dict);
1652     
1653     *ol = ord_list_append(stream, *ol, ord);
1654     
1655     ord_len = key_SU_encode(ord, ord_buf);
1656     
1657     wrbuf_putc(term_dict, '(');
1658     for (i = 0; i < ord_len; i++)
1659     {
1660         wrbuf_putc(term_dict, 1);
1661         wrbuf_putc(term_dict, ord_buf[i]);
1662     }
1663     wrbuf_putc(term_dict, ')');
1664     
1665     if (!numeric_relation(zh, zapt, &termp, term_dict,
1666                           attributeSet, grep_info, &max_pos, zm,
1667                           term_dst, &relation_error))
1668     {
1669         if (relation_error)
1670         {
1671             zebra_setError(zh, relation_error, 0);
1672             return ZEBRA_FAIL;
1673         }
1674         *term_sub = 0;
1675         return ZEBRA_OK;
1676     }
1677     *term_sub = termp;
1678     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1679     return ZEBRA_OK;
1680 }
1681
1682                                  
1683 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1684                                         Z_AttributesPlusTerm *zapt,
1685                                         const char *termz,
1686                                         const Odr_oid *attributeSet,
1687                                         NMEM stream,
1688                                         const char *index_type, 
1689                                         int complete_flag,
1690                                         const char *rank_type, 
1691                                         const char *xpath_use,
1692                                         NMEM rset_nmem,
1693                                         RSET *rset,
1694                                         struct rset_key_control *kc)
1695 {
1696     char term_dst[IT_MAX_WORD+1];
1697     const char *termp = termz;
1698     RSET *result_sets = 0;
1699     int num_result_sets = 0;
1700     ZEBRA_RES res;
1701     struct grep_info grep_info;
1702     int alloc_sets = 0;
1703     zint hits_limit_value;
1704     const char *term_ref_id_str = 0;
1705
1706     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1707
1708     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1709     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1710         return ZEBRA_FAIL;
1711     while (1)
1712     { 
1713         struct ord_list *ol;
1714         WRBUF term_dict = wrbuf_alloc();
1715         if (alloc_sets == num_result_sets)
1716         {
1717             int add = 10;
1718             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1719                                               sizeof(*rnew));
1720             if (alloc_sets)
1721                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1722             alloc_sets = alloc_sets + add;
1723             result_sets = rnew;
1724         }
1725         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1726         grep_info.isam_p_indx = 0;
1727         res = numeric_term(zh, zapt, &termp, term_dict,
1728                            attributeSet, stream, &grep_info,
1729                            index_type, complete_flag,
1730                            term_dst, xpath_use, &ol);
1731         wrbuf_destroy(term_dict);
1732         if (res == ZEBRA_FAIL || termp == 0)
1733             break;
1734         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1735         result_sets[num_result_sets] =
1736             rset_trunc(zh, grep_info.isam_p_buf,
1737                        grep_info.isam_p_indx, term_dst,
1738                        strlen(term_dst), rank_type,
1739                        0 /* preserve position */,
1740                        zapt->term->which, rset_nmem, 
1741                        kc, kc->scope, ol, index_type,
1742                        hits_limit_value,
1743                        term_ref_id_str);
1744         if (!result_sets[num_result_sets])
1745             break;
1746         num_result_sets++;
1747         if (!*termp)
1748             break;
1749     }
1750     grep_info_delete(&grep_info);
1751
1752     if (res != ZEBRA_OK)
1753         return res;
1754     if (num_result_sets == 0)
1755         *rset = rset_create_null(rset_nmem, kc, 0);
1756     else if (num_result_sets == 1)
1757         *rset = result_sets[0];
1758     else
1759         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1760                                 num_result_sets, result_sets);
1761     if (!*rset)
1762         return ZEBRA_FAIL;
1763     return ZEBRA_OK;
1764 }
1765
1766 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1767                                       Z_AttributesPlusTerm *zapt,
1768                                       const char *termz,
1769                                       const Odr_oid *attributeSet,
1770                                       NMEM stream,
1771                                       const char *rank_type, NMEM rset_nmem,
1772                                       RSET *rset,
1773                                       struct rset_key_control *kc)
1774 {
1775     Record rec;
1776     zint sysno = atozint(termz);
1777     
1778     if (sysno <= 0)
1779         sysno = 0;
1780     rec = rec_get(zh->reg->records, sysno);
1781     if (!rec)
1782         sysno = 0;
1783
1784     rec_free(&rec);
1785
1786     if (sysno <= 0)
1787     {
1788         *rset = rset_create_null(rset_nmem, kc, 0);
1789     }
1790     else
1791     {
1792         RSFD rsfd;
1793         struct it_key key;
1794         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1795                                  res_get(zh->res, "setTmpDir"), 0);
1796         rsfd = rset_open(*rset, RSETF_WRITE);
1797         
1798         key.mem[0] = sysno;
1799         key.mem[1] = 1;
1800         key.len = 2;
1801         rset_write(rsfd, &key);
1802         rset_close(rsfd);
1803     }
1804     return ZEBRA_OK;
1805 }
1806
1807 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1808                                const Odr_oid *attributeSet, NMEM stream,
1809                                Z_SortKeySpecList *sort_sequence,
1810                                const char *rank_type,
1811                                NMEM rset_nmem,
1812                                RSET *rset,
1813                                struct rset_key_control *kc)
1814 {
1815     int i;
1816     int sort_relation_value;
1817     AttrType sort_relation_type;
1818     Z_SortKeySpec *sks;
1819     Z_SortKey *sk;
1820     char termz[20];
1821     
1822     attr_init_APT(&sort_relation_type, zapt, 7);
1823     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1824
1825     if (!sort_sequence->specs)
1826     {
1827         sort_sequence->num_specs = 10;
1828         sort_sequence->specs = (Z_SortKeySpec **)
1829             nmem_malloc(stream, sort_sequence->num_specs *
1830                          sizeof(*sort_sequence->specs));
1831         for (i = 0; i<sort_sequence->num_specs; i++)
1832             sort_sequence->specs[i] = 0;
1833     }
1834     if (zapt->term->which != Z_Term_general)
1835         i = 0;
1836     else
1837         i = atoi_n((char *) zapt->term->u.general->buf,
1838                     zapt->term->u.general->len);
1839     if (i >= sort_sequence->num_specs)
1840         i = 0;
1841     sprintf(termz, "%d", i);
1842
1843     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1844     sks->sortElement = (Z_SortElement *)
1845         nmem_malloc(stream, sizeof(*sks->sortElement));
1846     sks->sortElement->which = Z_SortElement_generic;
1847     sk = sks->sortElement->u.generic = (Z_SortKey *)
1848         nmem_malloc(stream, sizeof(*sk));
1849     sk->which = Z_SortKey_sortAttributes;
1850     sk->u.sortAttributes = (Z_SortAttributes *)
1851         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1852
1853     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1854     sk->u.sortAttributes->list = zapt->attributes;
1855
1856     sks->sortRelation = (int *)
1857         nmem_malloc(stream, sizeof(*sks->sortRelation));
1858     if (sort_relation_value == 1)
1859         *sks->sortRelation = Z_SortKeySpec_ascending;
1860     else if (sort_relation_value == 2)
1861         *sks->sortRelation = Z_SortKeySpec_descending;
1862     else 
1863         *sks->sortRelation = Z_SortKeySpec_ascending;
1864
1865     sks->caseSensitivity = (int *)
1866         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1867     *sks->caseSensitivity = 0;
1868
1869     sks->which = Z_SortKeySpec_null;
1870     sks->u.null = odr_nullval ();
1871     sort_sequence->specs[i] = sks;
1872     *rset = rset_create_null(rset_nmem, kc, 0);
1873     return ZEBRA_OK;
1874 }
1875
1876
1877 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1878                            const Odr_oid *attributeSet,
1879                            struct xpath_location_step *xpath, int max,
1880                            NMEM mem)
1881 {
1882     const Odr_oid *curAttributeSet = attributeSet;
1883     AttrType use;
1884     const char *use_string = 0;
1885     
1886     attr_init_APT(&use, zapt, 1);
1887     attr_find_ex(&use, &curAttributeSet, &use_string);
1888
1889     if (!use_string || *use_string != '/')
1890         return -1;
1891
1892     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1893 }
1894  
1895                
1896
1897 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1898                         const char *index_type, const char *term, 
1899                         const char *xpath_use,
1900                         NMEM rset_nmem,
1901                         struct rset_key_control *kc)
1902 {
1903     struct grep_info grep_info;
1904     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1905                                            zinfo_index_category_index,
1906                                            index_type, xpath_use);
1907     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1908         return rset_create_null(rset_nmem, kc, 0);
1909     
1910     if (ord < 0)
1911         return rset_create_null(rset_nmem, kc, 0);
1912     else
1913     {
1914         int i, r, max_pos;
1915         char ord_buf[32];
1916         RSET rset;
1917         WRBUF term_dict = wrbuf_alloc();
1918         int ord_len = key_SU_encode(ord, ord_buf);
1919         int term_type = Z_Term_characterString;
1920         const char *flags = "void";
1921
1922         wrbuf_putc(term_dict, '(');
1923         for (i = 0; i<ord_len; i++)
1924         {
1925             wrbuf_putc(term_dict, 1);
1926             wrbuf_putc(term_dict, ord_buf[i]);
1927         }
1928         wrbuf_putc(term_dict, ')');
1929         wrbuf_puts(term_dict, term);
1930         
1931         grep_info.isam_p_indx = 0;
1932         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1933                              &grep_info, &max_pos, 0, grep_handle);
1934         yaz_log(YLOG_DEBUG, "%s %d positions", term,
1935                 grep_info.isam_p_indx);
1936         rset = rset_trunc(zh, grep_info.isam_p_buf,
1937                           grep_info.isam_p_indx, term, strlen(term),
1938                           flags, 1, term_type, rset_nmem,
1939                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1940                           0 /* term_ref_id_str */);
1941         grep_info_delete(&grep_info);
1942         wrbuf_destroy(term_dict);
1943         return rset;
1944     }
1945 }
1946
1947 static
1948 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1949                            NMEM stream, const char *rank_type, RSET rset,
1950                            int xpath_len, struct xpath_location_step *xpath,
1951                            NMEM rset_nmem,
1952                            RSET *rset_out,
1953                            struct rset_key_control *kc)
1954 {
1955     int i;
1956     int always_matches = rset ? 0 : 1;
1957
1958     if (xpath_len < 0)
1959     {
1960         *rset_out = rset;
1961         return ZEBRA_OK;
1962     }
1963
1964     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1965     for (i = 0; i<xpath_len; i++)
1966     {
1967         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1968
1969     }
1970
1971     /*
1972       //a    ->    a/.*
1973       //a/b  ->    b/a/.*
1974       /a     ->    a/
1975       /a/b   ->    b/a/
1976
1977       /      ->    none
1978
1979    a[@attr = value]/b[@other = othervalue]
1980
1981  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1982  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1983  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1984  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1985  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1986  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1987       
1988     */
1989
1990     dict_grep_cmap(zh->reg->dict, 0, 0);
1991     
1992     {
1993         int level = xpath_len;
1994         int first_path = 1;
1995         
1996         while (--level >= 0)
1997         {
1998             WRBUF xpath_rev = wrbuf_alloc();
1999             int i;
2000             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2001
2002             for (i = level; i >= 1; --i)
2003             {
2004                 const char *cp = xpath[i].part;
2005                 if (*cp)
2006                 {
2007                     for (; *cp; cp++)
2008                     {
2009                         if (*cp == '*')
2010                             wrbuf_puts(xpath_rev, "[^/]*");
2011                         else if (*cp == ' ')
2012                             wrbuf_puts(xpath_rev, "\001 ");
2013                         else
2014                             wrbuf_putc(xpath_rev, *cp);
2015
2016                         /* wrbuf_putc does not null-terminate , but
2017                            wrbuf_puts below ensures it does.. so xpath_rev
2018                            is OK iff length is > 0 */
2019                     }
2020                     wrbuf_puts(xpath_rev, "/");
2021                 }
2022                 else if (i == 1)  /* // case */
2023                     wrbuf_puts(xpath_rev, ".*");
2024             }
2025             if (xpath[level].predicate &&
2026                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2027                 xpath[level].predicate->u.relation.name[0])
2028             {
2029                 WRBUF wbuf = wrbuf_alloc();
2030                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2031                 if (xpath[level].predicate->u.relation.value)
2032                 {
2033                     const char *cp = xpath[level].predicate->u.relation.value;
2034                     wrbuf_putc(wbuf, '=');
2035                     
2036                     while (*cp)
2037                     {
2038                         if (strchr(REGEX_CHARS, *cp))
2039                             wrbuf_putc(wbuf, '\\');
2040                         wrbuf_putc(wbuf, *cp);
2041                         cp++;
2042                     }
2043                 }
2044                 rset_attr = xpath_trunc(
2045                     zh, stream, "0", wrbuf_cstr(wbuf), 
2046                     ZEBRA_XPATH_ATTR_NAME, 
2047                     rset_nmem, kc);
2048                 wrbuf_destroy(wbuf);
2049             } 
2050             else 
2051             {
2052                 if (!first_path)
2053                 {
2054                     wrbuf_destroy(xpath_rev);
2055                     continue;
2056                 }
2057             }
2058             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2059                     wrbuf_cstr(xpath_rev));
2060             if (wrbuf_len(xpath_rev))
2061             {
2062                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2063                                              wrbuf_cstr(xpath_rev),
2064                                              ZEBRA_XPATH_ELM_BEGIN, 
2065                                              rset_nmem, kc);
2066                 if (always_matches)
2067                     rset = rset_start_tag;
2068                 else
2069                 {
2070                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2071                                                wrbuf_cstr(xpath_rev),
2072                                                ZEBRA_XPATH_ELM_END, 
2073                                                rset_nmem, kc);
2074                     
2075                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2076                                                rset_start_tag, rset,
2077                                                rset_end_tag, rset_attr);
2078                 }
2079             }
2080             wrbuf_destroy(xpath_rev);
2081             first_path = 0;
2082         }
2083     }
2084     *rset_out = rset;
2085     return ZEBRA_OK;
2086 }
2087
2088 #define MAX_XPATH_STEPS 10
2089
2090 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2091                                      Z_AttributesPlusTerm *zapt,
2092                                      const Odr_oid *attributeSet, NMEM stream,
2093                                      Z_SortKeySpecList *sort_sequence,
2094                                      NMEM rset_nmem,
2095                                      RSET *rset,
2096                                      struct rset_key_control *kc);
2097
2098 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2099                                 const Odr_oid *attributeSet, NMEM stream,
2100                                 Z_SortKeySpecList *sort_sequence,
2101                                 int num_bases, char **basenames, 
2102                                 NMEM rset_nmem,
2103                                 RSET *rset,
2104                                 struct rset_key_control *kc)
2105 {
2106     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2107     ZEBRA_RES res = ZEBRA_OK;
2108     int i;
2109     for (i = 0; i < num_bases; i++)
2110     {
2111
2112         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2113         {
2114             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2115                            basenames[i]);
2116             res = ZEBRA_FAIL;
2117             break;
2118         }
2119         res = rpn_search_database(zh, zapt, attributeSet, stream,
2120                                   sort_sequence,
2121                                   rset_nmem, rsets+i, kc);
2122         if (res != ZEBRA_OK)
2123             break;
2124     }
2125     if (res != ZEBRA_OK)
2126     {   /* must clean up the already created sets */
2127         while (--i >= 0)
2128             rset_delete(rsets[i]);
2129         *rset = 0;
2130     }
2131     else 
2132     {
2133         if (num_bases == 1)
2134             *rset = rsets[0];
2135         else if (num_bases == 0)
2136             *rset = rset_create_null(rset_nmem, kc, 0); 
2137         else
2138             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2139                                    num_bases, rsets);
2140     }
2141     return res;
2142 }
2143
2144 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2145                                      Z_AttributesPlusTerm *zapt,
2146                                      const Odr_oid *attributeSet, NMEM stream,
2147                                      Z_SortKeySpecList *sort_sequence,
2148                                      NMEM rset_nmem,
2149                                      RSET *rset,
2150                                      struct rset_key_control *kc)
2151 {
2152     ZEBRA_RES res = ZEBRA_OK;
2153     const char *index_type;
2154     char *search_type = NULL;
2155     char rank_type[128];
2156     int complete_flag;
2157     int sort_flag;
2158     char termz[IT_MAX_WORD+1];
2159     int xpath_len;
2160     const char *xpath_use = 0;
2161     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2162
2163     if (!log_level_set)
2164     {
2165         log_level_rpn = yaz_log_module_level("rpn");
2166         log_level_set = 1;
2167     }
2168     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2169                     rank_type, &complete_flag, &sort_flag);
2170     
2171     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2172     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2173     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2174     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2175
2176     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2177         return ZEBRA_FAIL;
2178
2179     if (sort_flag)
2180         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2181                              rank_type, rset_nmem, rset, kc);
2182     /* consider if an X-Path query is used */
2183     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2184                                 xpath, MAX_XPATH_STEPS, stream);
2185     if (xpath_len >= 0)
2186     {
2187         if (xpath[xpath_len-1].part[0] == '@') 
2188             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2189         else
2190             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2191
2192         if (1)
2193         {
2194             AttrType relation;
2195             int relation_value;
2196
2197             attr_init_APT(&relation, zapt, 2);
2198             relation_value = attr_find(&relation, NULL);
2199
2200             if (relation_value == 103) /* alwaysmatches */
2201             {
2202                 *rset = 0; /* signal no "term" set */
2203                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2204                                         xpath_len, xpath, rset_nmem, rset, kc);
2205             }
2206         }
2207     }
2208
2209     /* search using one of the various search type strategies
2210        termz is our UTF-8 search term
2211        attributeSet is top-level default attribute set 
2212        stream is ODR for search
2213        reg_id is the register type
2214        complete_flag is 1 for complete subfield, 0 for incomplete
2215        xpath_use is use-attribute to be used for X-Path search, 0 for none
2216     */
2217     if (!strcmp(search_type, "phrase"))
2218     {
2219         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2220                                     index_type, complete_flag, rank_type,
2221                                     xpath_use,
2222                                     rset_nmem,
2223                                     rset, kc);
2224     }
2225     else if (!strcmp(search_type, "and-list"))
2226     {
2227         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2228                                       index_type, complete_flag, rank_type,
2229                                       xpath_use,
2230                                       rset_nmem,
2231                                       rset, kc);
2232     }
2233     else if (!strcmp(search_type, "or-list"))
2234     {
2235         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2236                                      index_type, complete_flag, rank_type,
2237                                      xpath_use,
2238                                      rset_nmem,
2239                                      rset, kc);
2240     }
2241     else if (!strcmp(search_type, "local"))
2242     {
2243         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2244                                    rank_type, rset_nmem, rset, kc);
2245     }
2246     else if (!strcmp(search_type, "numeric"))
2247     {
2248         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2249                                      index_type, complete_flag, rank_type,
2250                                      xpath_use,
2251                                      rset_nmem,
2252                                      rset, kc);
2253     }
2254     else
2255     {
2256         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2257         res = ZEBRA_FAIL;
2258     }
2259     if (res != ZEBRA_OK)
2260         return res;
2261     if (!*rset)
2262         return ZEBRA_FAIL;
2263     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2264                             xpath_len, xpath, rset_nmem, rset, kc);
2265 }
2266
2267 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2268                                       const Odr_oid *attributeSet, 
2269                                       NMEM stream, NMEM rset_nmem,
2270                                       Z_SortKeySpecList *sort_sequence,
2271                                       int num_bases, char **basenames,
2272                                       RSET **result_sets, int *num_result_sets,
2273                                       Z_Operator *parent_op,
2274                                       struct rset_key_control *kc);
2275
2276 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2277                                    zint *approx_limit)
2278 {
2279     ZEBRA_RES res = ZEBRA_OK;
2280     if (zs->which == Z_RPNStructure_complex)
2281     {
2282         if (res == ZEBRA_OK)
2283             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2284                                            approx_limit);
2285         if (res == ZEBRA_OK)
2286             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2287                                            approx_limit);
2288     }
2289     else if (zs->which == Z_RPNStructure_simple)
2290     {
2291         if (zs->u.simple->which == Z_Operand_APT)
2292         {
2293             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2294             AttrType global_hits_limit_attr;
2295             int l;
2296             
2297             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2298             
2299             l = attr_find(&global_hits_limit_attr, NULL);
2300             if (l != -1)
2301                 *approx_limit = l;
2302         }
2303     }
2304     return res;
2305 }
2306
2307 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2308                          const Odr_oid *attributeSet, 
2309                          NMEM stream, NMEM rset_nmem,
2310                          Z_SortKeySpecList *sort_sequence,
2311                          int num_bases, char **basenames,
2312                          RSET *result_set)
2313 {
2314     RSET *result_sets = 0;
2315     int num_result_sets = 0;
2316     ZEBRA_RES res;
2317     struct rset_key_control *kc = zebra_key_control_create(zh);
2318
2319     res = rpn_search_structure(zh, zs, attributeSet,
2320                                stream, rset_nmem,
2321                                sort_sequence, 
2322                                num_bases, basenames,
2323                                &result_sets, &num_result_sets,
2324                                0 /* no parent op */,
2325                                kc);
2326     if (res != ZEBRA_OK)
2327     {
2328         int i;
2329         for (i = 0; i<num_result_sets; i++)
2330             rset_delete(result_sets[i]);
2331         *result_set = 0;
2332     }
2333     else
2334     {
2335         assert(num_result_sets == 1);
2336         assert(result_sets);
2337         assert(*result_sets);
2338         *result_set = *result_sets;
2339     }
2340     (*kc->dec)(kc);
2341     return res;
2342 }
2343
2344 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2345                                const Odr_oid *attributeSet, 
2346                                NMEM stream, NMEM rset_nmem,
2347                                Z_SortKeySpecList *sort_sequence,
2348                                int num_bases, char **basenames,
2349                                RSET **result_sets, int *num_result_sets,
2350                                Z_Operator *parent_op,
2351                                struct rset_key_control *kc)
2352 {
2353     *num_result_sets = 0;
2354     if (zs->which == Z_RPNStructure_complex)
2355     {
2356         ZEBRA_RES res;
2357         Z_Operator *zop = zs->u.complex->roperator;
2358         RSET *result_sets_l = 0;
2359         int num_result_sets_l = 0;
2360         RSET *result_sets_r = 0;
2361         int num_result_sets_r = 0;
2362
2363         res = rpn_search_structure(zh, zs->u.complex->s1,
2364                                    attributeSet, stream, rset_nmem,
2365                                    sort_sequence,
2366                                    num_bases, basenames,
2367                                    &result_sets_l, &num_result_sets_l,
2368                                    zop, kc);
2369         if (res != ZEBRA_OK)
2370         {
2371             int i;
2372             for (i = 0; i<num_result_sets_l; i++)
2373                 rset_delete(result_sets_l[i]);
2374             return res;
2375         }
2376         res = rpn_search_structure(zh, zs->u.complex->s2,
2377                                    attributeSet, stream, rset_nmem,
2378                                    sort_sequence,
2379                                    num_bases, basenames,
2380                                    &result_sets_r, &num_result_sets_r,
2381                                    zop, kc);
2382         if (res != ZEBRA_OK)
2383         {
2384             int i;
2385             for (i = 0; i<num_result_sets_l; i++)
2386                 rset_delete(result_sets_l[i]);
2387             for (i = 0; i<num_result_sets_r; i++)
2388                 rset_delete(result_sets_r[i]);
2389             return res;
2390         }
2391
2392         /* make a new list of result for all children */
2393         *num_result_sets = num_result_sets_l + num_result_sets_r;
2394         *result_sets = nmem_malloc(stream, *num_result_sets * 
2395                                    sizeof(**result_sets));
2396         memcpy(*result_sets, result_sets_l, 
2397                num_result_sets_l * sizeof(**result_sets));
2398         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2399                num_result_sets_r * sizeof(**result_sets));
2400
2401         if (!parent_op || parent_op->which != zop->which
2402             || (zop->which != Z_Operator_and &&
2403                 zop->which != Z_Operator_or))
2404         {
2405             /* parent node different from this one (or non-present) */
2406             /* we must combine result sets now */
2407             RSET rset;
2408             switch (zop->which)
2409             {
2410             case Z_Operator_and:
2411                 rset = rset_create_and(rset_nmem, kc,
2412                                        kc->scope,
2413                                        *num_result_sets, *result_sets);
2414                 break;
2415             case Z_Operator_or:
2416                 rset = rset_create_or(rset_nmem, kc,
2417                                       kc->scope, 0, /* termid */
2418                                       *num_result_sets, *result_sets);
2419                 break;
2420             case Z_Operator_and_not:
2421                 rset = rset_create_not(rset_nmem, kc,
2422                                        kc->scope,
2423                                        (*result_sets)[0],
2424                                        (*result_sets)[1]);
2425                 break;
2426             case Z_Operator_prox:
2427                 if (zop->u.prox->which != Z_ProximityOperator_known)
2428                 {
2429                     zebra_setError(zh, 
2430                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2431                                    0);
2432                     return ZEBRA_FAIL;
2433                 }
2434                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2435                 {
2436                     zebra_setError_zint(zh,
2437                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2438                                         *zop->u.prox->u.known);
2439                     return ZEBRA_FAIL;
2440                 }
2441                 else
2442                 {
2443                     rset = rset_create_prox(rset_nmem, kc,
2444                                             kc->scope,
2445                                             *num_result_sets, *result_sets, 
2446                                             *zop->u.prox->ordered,
2447                                             (!zop->u.prox->exclusion ? 
2448                                              0 : *zop->u.prox->exclusion),
2449                                             *zop->u.prox->relationType,
2450                                             *zop->u.prox->distance );
2451                 }
2452                 break;
2453             default:
2454                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2455                 return ZEBRA_FAIL;
2456             }
2457             *num_result_sets = 1;
2458             *result_sets = nmem_malloc(stream, *num_result_sets * 
2459                                        sizeof(**result_sets));
2460             (*result_sets)[0] = rset;
2461         }
2462     }
2463     else if (zs->which == Z_RPNStructure_simple)
2464     {
2465         RSET rset;
2466         ZEBRA_RES res;
2467
2468         if (zs->u.simple->which == Z_Operand_APT)
2469         {
2470             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2471             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2472                                  attributeSet, stream, sort_sequence,
2473                                  num_bases, basenames, rset_nmem, &rset,
2474                                  kc);
2475             if (res != ZEBRA_OK)
2476                 return res;
2477         }
2478         else if (zs->u.simple->which == Z_Operand_resultSetId)
2479         {
2480             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2481             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2482             if (!rset)
2483             {
2484                 zebra_setError(zh, 
2485                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2486                                zs->u.simple->u.resultSetId);
2487                 return ZEBRA_FAIL;
2488             }
2489             rset_dup(rset);
2490         }
2491         else
2492         {
2493             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2494             return ZEBRA_FAIL;
2495         }
2496         *num_result_sets = 1;
2497         *result_sets = nmem_malloc(stream, *num_result_sets * 
2498                                    sizeof(**result_sets));
2499         (*result_sets)[0] = rset;
2500     }
2501     else
2502     {
2503         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2504         return ZEBRA_FAIL;
2505     }
2506     return ZEBRA_OK;
2507 }
2508
2509
2510
2511 /*
2512  * Local variables:
2513  * c-basic-offset: 4
2514  * indent-tabs-mode: nil
2515  * End:
2516  * vim: shiftwidth=4 tabstop=8 expandtab
2517  */
2518