Changed type of index_type char/int to string.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.16 2007-10-29 16:57:53 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode(&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           char *dst_term, int *dst_ptr,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215     memcpy(dst_term + *dst_ptr, start, sz);
216     (*dst_ptr) += sz;
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231         
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236 /* term_100: handle term, where trunc = none(no operators at all) */
237 static int term_100(ZebraMaps zebra_maps, const char *index_type,
238                     const char **src, WRBUF term_dict, int space_split,
239                     char *dst_term)
240 {
241     const char *s0;
242     const char **map;
243     int i = 0;
244     int j = 0;
245
246     const char *space_start = 0;
247     const char *space_end = 0;
248
249     if (!term_pre(zebra_maps, *index_type, src, NULL, NULL, !space_split))
250         return 0;
251     s0 = *src;
252     while (*s0)
253     {
254         const char *s1 = s0;
255         int q_map_match = 0;
256         map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
257                                 &q_map_match);
258         if (space_split)
259         {
260             if (**map == *CHR_SPACE)
261                 break;
262         }
263         else  /* complete subfield only. */
264         {
265             if (**map == *CHR_SPACE)
266             {   /* save space mapping for later  .. */
267                 space_start = s1;
268                 space_end = s0;
269                 continue;
270             }
271             else if (space_start)
272             {   /* reload last space */
273                 while (space_start < space_end)
274                 {
275                     if (strchr(REGEX_CHARS, *space_start))
276                         wrbuf_putc(term_dict, '\\');
277                     dst_term[j++] = *space_start;
278                     wrbuf_putc(term_dict, *space_start);
279                     space_start++;
280                                
281                 }
282                 /* and reset */
283                 space_start = space_end = 0;
284             }
285         }
286         i++;
287
288         add_non_space(s1, s0, term_dict, dst_term, &j,
289                       map, q_map_match);
290     }
291     dst_term[j] = '\0';
292     *src = s0;
293     return i;
294 }
295
296 /* term_101: handle term, where trunc = Process # */
297 static int term_101(ZebraMaps zebra_maps, const char *index_type,
298                     const char **src, WRBUF term_dict, int space_split,
299                     char *dst_term)
300 {
301     const char *s0;
302     const char **map;
303     int i = 0;
304     int j = 0;
305
306     if (!term_pre(zebra_maps, *index_type, src, "#", "#", !space_split))
307         return 0;
308     s0 = *src;
309     while (*s0)
310     {
311         if (*s0 == '#')
312         {
313             i++;
314             wrbuf_puts(term_dict, ".*");
315             dst_term[j++] = *s0++;
316         }
317         else
318         {
319             const char *s1 = s0;
320             int q_map_match = 0;
321             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
322                                     &q_map_match);
323             if (space_split && **map == *CHR_SPACE)
324                 break;
325
326             i++;
327             add_non_space(s1, s0, term_dict, dst_term, &j,
328                           map, q_map_match);
329         }
330     }
331     dst_term[j++] = '\0';
332     *src = s0;
333     return i;
334 }
335
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, const char *index_type, 
338                     const char **src,
339                     WRBUF term_dict, int *errors, int space_split,
340                     char *dst_term)
341 {
342     int i = 0;
343     int j = 0;
344     const char *s0;
345     const char **map;
346
347     if (!term_pre(zebra_maps, *index_type, src, "^\\()[].*+?|", "(", !space_split))
348         return 0;
349     s0 = *src;
350     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
351         isdigit(((const unsigned char *)s0)[1]))
352     {
353         *errors = s0[1] - '0';
354         s0 += 3;
355         if (*errors > 3)
356             *errors = 3;
357     }
358     while (*s0)
359     {
360         if (strchr("^\\()[].*+?|-", *s0))
361         {
362             dst_term[j++] = *s0;
363             wrbuf_putc(term_dict, *s0);
364             s0++;
365             i++;
366         }
367         else
368         {
369             const char *s1 = s0;
370             int q_map_match = 0;
371             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
372                                     &q_map_match);
373             if (space_split && **map == *CHR_SPACE)
374                 break;
375
376             i++;
377             add_non_space(s1, s0, term_dict, dst_term, &j,
378                           map, q_map_match);
379         }
380     }
381     dst_term[j] = '\0';
382     *src = s0;
383     
384     return i;
385 }
386
387 /* term_103: handle term, where trunc = re-1 (regular expressions) */
388 static int term_102(ZebraMaps zebra_maps, const char *index_type, 
389                     const char **src,
390                     WRBUF term_dict, int space_split, char *dst_term)
391 {
392     return term_103(zebra_maps, index_type, src, term_dict, NULL, space_split,
393                     dst_term);
394 }
395
396
397 /* term_104: handle term, process # and ! */
398 static int term_104(ZebraMaps zebra_maps, const char *index_type,
399                     const char **src, WRBUF term_dict, int space_split,
400                     char *dst_term)
401 {
402     const char *s0;
403     const char **map;
404     int i = 0;
405     int j = 0;
406
407     if (!term_pre(zebra_maps, *index_type, src, "?*#", "?*#", !space_split))
408         return 0;
409     s0 = *src;
410     while (*s0)
411     {
412         if (*s0 == '?')
413         {
414             i++;
415             dst_term[j++] = *s0++;
416             if (*s0 >= '0' && *s0 <= '9')
417             {
418                 int limit = 0;
419                 while (*s0 >= '0' && *s0 <= '9')
420                 {
421                     limit = limit * 10 + (*s0 - '0');
422                     dst_term[j++] = *s0++;
423                 }
424                 if (limit > 20)
425                     limit = 20;
426                 while (--limit >= 0)
427                 {
428                     wrbuf_puts(term_dict, ".?");
429                 }
430             }
431             else
432             {
433                 wrbuf_puts(term_dict, ".*");
434             }
435         }
436         else if (*s0 == '*')
437         {
438             i++;
439             wrbuf_puts(term_dict, ".*");
440             dst_term[j++] = *s0++;
441         }
442         else if (*s0 == '#')
443         {
444             i++;
445             wrbuf_puts(term_dict, ".");
446             dst_term[j++] = *s0++;
447         }
448         else
449         {
450             const char *s1 = s0;
451             int q_map_match = 0;
452             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
453                                     &q_map_match);
454             if (space_split && **map == *CHR_SPACE)
455                 break;
456
457             i++;
458             add_non_space(s1, s0, term_dict, dst_term, &j,
459                           map, q_map_match);
460         }
461     }
462     dst_term[j++] = '\0';
463     *src = s0;
464     return i;
465 }
466
467 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
468 static int term_105(ZebraMaps zebra_maps, const char *index_type,
469                     const char **src, WRBUF term_dict, int space_split,
470                     char *dst_term, int right_truncate)
471 {
472     const char *s0;
473     const char **map;
474     int i = 0;
475     int j = 0;
476
477     if (!term_pre(zebra_maps, *index_type, src, "*!", "*!", !space_split))
478         return 0;
479     s0 = *src;
480     while (*s0)
481     {
482         if (*s0 == '*')
483         {
484             i++;
485             wrbuf_puts(term_dict, ".*");
486             dst_term[j++] = *s0++;
487         }
488         else if (*s0 == '!')
489         {
490             i++;
491             wrbuf_putc(term_dict, '.');
492             dst_term[j++] = *s0++;
493         }
494         else
495         {
496             const char *s1 = s0;
497             int q_map_match = 0;
498             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
499                                     &q_map_match);
500             if (space_split && **map == *CHR_SPACE)
501                 break;
502
503             i++;
504             add_non_space(s1, s0, term_dict, dst_term, &j,
505                           map, q_map_match);
506         }
507     }
508     if (right_truncate)
509         wrbuf_puts(term_dict, ".*");
510     dst_term[j++] = '\0';
511     *src = s0;
512     return i;
513 }
514
515
516 /* gen_regular_rel - generate regular expression from relation
517  *  val:     border value (inclusive)
518  *  islt:    1 if <=; 0 if >=.
519  */
520 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
521 {
522     char dst_buf[20*5*20]; /* assuming enough for expansion */
523     char *dst = dst_buf;
524     int dst_p;
525     int w, d, i;
526     int pos = 0;
527     char numstr[20];
528
529     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
530     if (val >= 0)
531     {
532         if (islt)
533             strcpy(dst, "(-[0-9]+|(");
534         else
535             strcpy(dst, "((");
536     } 
537     else
538     {
539         if (!islt)
540         {
541             strcpy(dst, "([0-9]+|-(");
542             islt = 1;
543         }
544         else
545         {
546             strcpy(dst, "(-(");
547             islt = 0;
548         }
549         val = -val;
550     }
551     dst_p = strlen(dst);
552     sprintf(numstr, "%d", val);
553     for (w = strlen(numstr); --w >= 0; pos++)
554     {
555         d = numstr[w];
556         if (pos > 0)
557         {
558             if (islt)
559             {
560                 if (d == '0')
561                     continue;
562                 d--;
563             } 
564             else
565             {
566                 if (d == '9')
567                     continue;
568                 d++;
569             }
570         }
571         
572         strcpy(dst + dst_p, numstr);
573         dst_p = strlen(dst) - pos - 1;
574
575         if (islt)
576         {
577             if (d != '0')
578             {
579                 dst[dst_p++] = '[';
580                 dst[dst_p++] = '0';
581                 dst[dst_p++] = '-';
582                 dst[dst_p++] = d;
583                 dst[dst_p++] = ']';
584             }
585             else
586                 dst[dst_p++] = d;
587         }
588         else
589         {
590             if (d != '9')
591             { 
592                 dst[dst_p++] = '[';
593                 dst[dst_p++] = d;
594                 dst[dst_p++] = '-';
595                 dst[dst_p++] = '9';
596                 dst[dst_p++] = ']';
597             }
598             else
599                 dst[dst_p++] = d;
600         }
601         for (i = 0; i<pos; i++)
602         {
603             dst[dst_p++] = '[';
604             dst[dst_p++] = '0';
605             dst[dst_p++] = '-';
606             dst[dst_p++] = '9';
607             dst[dst_p++] = ']';
608         }
609         dst[dst_p++] = '|';
610     }
611     dst[dst_p] = '\0';
612     if (islt)
613     {
614         /* match everything less than 10^(pos-1) */
615         strcat(dst, "0*");
616         for (i = 1; i<pos; i++)
617             strcat(dst, "[0-9]?");
618     }
619     else
620     {
621         /* match everything greater than 10^pos */
622         for (i = 0; i <= pos; i++)
623             strcat(dst, "[0-9]");
624         strcat(dst, "[0-9]*");
625     }
626     strcat(dst, "))");
627     wrbuf_puts(term_dict, dst);
628 }
629
630 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
631 {
632     const char *src = wrbuf_cstr(wsrc);
633     if (src[*indx] == '\\')
634     {
635         wrbuf_putc(term_p, src[*indx]);
636         (*indx)++;
637     }
638     wrbuf_putc(term_p, src[*indx]);
639     (*indx)++;
640 }
641
642 /*
643  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
644  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
645  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
646  *              ([^-a].*|a[^-b].*|ab[c-].*)
647  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
648  *              ([^a-].*|a[^b-].*|ab[^c-].*)
649  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
650  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
651  */
652 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
653                            const char **term_sub, WRBUF term_dict,
654                            const Odr_oid *attributeSet,
655                            const char *index_type, int space_split, char *term_dst,
656                            int *error_code)
657 {
658     AttrType relation;
659     int relation_value;
660     int i;
661     WRBUF term_component = wrbuf_alloc();
662
663     attr_init_APT(&relation, zapt, 2);
664     relation_value = attr_find(&relation, NULL);
665
666     *error_code = 0;
667     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
668     switch (relation_value)
669     {
670     case 1:
671         if (!term_100(zh->reg->zebra_maps, index_type,
672                       term_sub, term_component,
673                       space_split, term_dst))
674         {
675             wrbuf_destroy(term_component);
676             return 0;
677         }
678         yaz_log(log_level_rpn, "Relation <");
679         
680         wrbuf_putc(term_dict, '(');
681         for (i = 0; i < wrbuf_len(term_component); )
682         {
683             int j = 0;
684             
685             if (i)
686                 wrbuf_putc(term_dict, '|');
687             while (j < i)
688                 string_rel_add_char(term_dict, term_component, &j);
689
690             wrbuf_putc(term_dict, '[');
691
692             wrbuf_putc(term_dict, '^');
693             
694             wrbuf_putc(term_dict, 1);
695             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
696             
697             string_rel_add_char(term_dict, term_component, &i);
698             wrbuf_putc(term_dict, '-');
699             
700             wrbuf_putc(term_dict, ']');
701             wrbuf_putc(term_dict, '.');
702             wrbuf_putc(term_dict, '*');
703         }
704         wrbuf_putc(term_dict, ')');
705         break;
706     case 2:
707         if (!term_100(zh->reg->zebra_maps, index_type,
708                       term_sub, term_component,
709                       space_split, term_dst))
710         {
711             wrbuf_destroy(term_component);
712             return 0;
713         }
714         yaz_log(log_level_rpn, "Relation <=");
715
716         wrbuf_putc(term_dict, '(');
717         for (i = 0; i < wrbuf_len(term_component); )
718         {
719             int j = 0;
720
721             while (j < i)
722                 string_rel_add_char(term_dict, term_component, &j);
723             wrbuf_putc(term_dict, '[');
724
725             wrbuf_putc(term_dict, '^');
726
727             wrbuf_putc(term_dict, 1);
728             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
729
730             string_rel_add_char(term_dict, term_component, &i);
731             wrbuf_putc(term_dict, '-');
732
733             wrbuf_putc(term_dict, ']');
734             wrbuf_putc(term_dict, '.');
735             wrbuf_putc(term_dict, '*');
736
737             wrbuf_putc(term_dict, '|');
738         }
739         for (i = 0; i < wrbuf_len(term_component); )
740             string_rel_add_char(term_dict, term_component, &i);
741         wrbuf_putc(term_dict, ')');
742         break;
743     case 5:
744         if (!term_100(zh->reg->zebra_maps, index_type,
745                       term_sub, term_component, space_split, term_dst))
746         {
747             wrbuf_destroy(term_component);
748             return 0;
749         }
750         yaz_log(log_level_rpn, "Relation >");
751
752         wrbuf_putc(term_dict, '(');
753         for (i = 0; i < wrbuf_len(term_component); )
754         {
755             int j = 0;
756
757             while (j < i)
758                 string_rel_add_char(term_dict, term_component, &j);
759             wrbuf_putc(term_dict, '[');
760             
761             wrbuf_putc(term_dict, '^');
762             wrbuf_putc(term_dict, '-');
763             string_rel_add_char(term_dict, term_component, &i);
764
765             wrbuf_putc(term_dict, ']');
766             wrbuf_putc(term_dict, '.');
767             wrbuf_putc(term_dict, '*');
768
769             wrbuf_putc(term_dict, '|');
770         }
771         for (i = 0; i < wrbuf_len(term_component); )
772             string_rel_add_char(term_dict, term_component, &i);
773         wrbuf_putc(term_dict, '.');
774         wrbuf_putc(term_dict, '+');
775         wrbuf_putc(term_dict, ')');
776         break;
777     case 4:
778         if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
779                       term_component, space_split, term_dst))
780         {
781             wrbuf_destroy(term_component);
782             return 0;
783         }
784         yaz_log(log_level_rpn, "Relation >=");
785
786         wrbuf_putc(term_dict, '(');
787         for (i = 0; i < wrbuf_len(term_component); )
788         {
789             int j = 0;
790
791             if (i)
792                 wrbuf_putc(term_dict, '|');
793             while (j < i)
794                 string_rel_add_char(term_dict, term_component, &j);
795             wrbuf_putc(term_dict, '[');
796
797             if (i < wrbuf_len(term_component)-1)
798             {
799                 wrbuf_putc(term_dict, '^');
800                 wrbuf_putc(term_dict, '-');
801                 string_rel_add_char(term_dict, term_component, &i);
802             }
803             else
804             {
805                 string_rel_add_char(term_dict, term_component, &i);
806                 wrbuf_putc(term_dict, '-');
807             }
808             wrbuf_putc(term_dict, ']');
809             wrbuf_putc(term_dict, '.');
810             wrbuf_putc(term_dict, '*');
811         }
812         wrbuf_putc(term_dict, ')');
813         break;
814     case 3:
815     case 102:
816     case -1:
817         if (!**term_sub)
818             return 1;
819         yaz_log(log_level_rpn, "Relation =");
820         if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
821                       term_component, space_split, term_dst))
822         {
823             wrbuf_destroy(term_component);
824             return 0;
825         }
826         wrbuf_puts(term_dict, "(");
827         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
828         wrbuf_puts(term_dict, ")");
829         break;
830     case 103:
831         yaz_log(log_level_rpn, "Relation always matches");
832         /* skip to end of term (we don't care what it is) */
833         while (**term_sub != '\0')
834             (*term_sub)++;
835         break;
836     default:
837         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
838         wrbuf_destroy(term_component);
839         return 0;
840     }
841     wrbuf_destroy(term_component);
842     return 1;
843 }
844
845 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
846                              const char **term_sub, 
847                              WRBUF term_dict,
848                              const Odr_oid *attributeSet, NMEM stream,
849                              struct grep_info *grep_info,
850                              const char *index_type, int complete_flag,
851                              int num_bases, char **basenames,
852                              char *term_dst,
853                              const char *xpath_use,
854                              struct ord_list **ol);
855
856 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
857                                  Z_AttributesPlusTerm *zapt,
858                                  zint *hits_limit_value,
859                                  const char **term_ref_id_str,
860                                  NMEM nmem)
861 {
862     AttrType term_ref_id_attr;
863     AttrType hits_limit_attr;
864     int term_ref_id_int;
865  
866     attr_init_APT(&hits_limit_attr, zapt, 11);
867     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
868
869     attr_init_APT(&term_ref_id_attr, zapt, 10);
870     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
871     if (term_ref_id_int >= 0)
872     {
873         char *res = nmem_malloc(nmem, 20);
874         sprintf(res, "%d", term_ref_id_int);
875         *term_ref_id_str = res;
876     }
877
878     /* no limit given ? */
879     if (*hits_limit_value == -1)
880     {
881         if (*term_ref_id_str)
882         {
883             /* use global if term_ref is present */
884             *hits_limit_value = zh->approx_limit;
885         }
886         else
887         {
888             /* no counting if term_ref is not present */
889             *hits_limit_value = 0;
890         }
891     }
892     else if (*hits_limit_value == 0)
893     {
894         /* 0 is the same as global limit */
895         *hits_limit_value = zh->approx_limit;
896     }
897     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
898             *term_ref_id_str ? *term_ref_id_str : "none",
899             *hits_limit_value);
900     return ZEBRA_OK;
901 }
902
903 static ZEBRA_RES term_trunc(ZebraHandle zh,
904                             Z_AttributesPlusTerm *zapt,
905                             const char **term_sub, 
906                             const Odr_oid *attributeSet, NMEM stream,
907                             struct grep_info *grep_info,
908                             const char *index_type, int complete_flag,
909                             int num_bases, char **basenames,
910                             char *term_dst,
911                             const char *rank_type, 
912                             const char *xpath_use,
913                             NMEM rset_nmem,
914                             RSET *rset,
915                             struct rset_key_control *kc)
916 {
917     ZEBRA_RES res;
918     struct ord_list *ol;
919     zint hits_limit_value;
920     const char *term_ref_id_str = 0;
921     WRBUF term_dict = wrbuf_alloc();
922
923     *rset = 0;
924     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
925     grep_info->isam_p_indx = 0;
926     res = string_term(zh, zapt, term_sub, term_dict,
927                       attributeSet, stream, grep_info,
928                       index_type, complete_flag, num_bases, basenames,
929                       term_dst, xpath_use, &ol);
930     wrbuf_destroy(term_dict);
931     if (res != ZEBRA_OK)
932         return res;
933     if (!*term_sub)  /* no more terms ? */
934         return res;
935     yaz_log(log_level_rpn, "term: %s", term_dst);
936     *rset = rset_trunc(zh, grep_info->isam_p_buf,
937                        grep_info->isam_p_indx, term_dst,
938                        strlen(term_dst), rank_type, 1 /* preserve pos */,
939                        zapt->term->which, rset_nmem,
940                        kc, kc->scope, ol, index_type, hits_limit_value,
941                        term_ref_id_str);
942     if (!*rset)
943         return ZEBRA_FAIL;
944     return ZEBRA_OK;
945 }
946
947 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
948                              const char **term_sub, 
949                              WRBUF term_dict,
950                              const Odr_oid *attributeSet, NMEM stream,
951                              struct grep_info *grep_info,
952                              const char *index_type, int complete_flag,
953                              int num_bases, char **basenames,
954                              char *term_dst,
955                              const char *xpath_use,
956                              struct ord_list **ol)
957 {
958     int r, base_no;
959     AttrType truncation;
960     int truncation_value;
961     const char *termp;
962     struct rpn_char_map_info rcmi;
963     int space_split = complete_flag ? 0 : 1;
964
965     int bases_ok = 0;     /* no of databases with OK attribute */
966
967     *ol = ord_list_create(stream);
968
969     rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
970     attr_init_APT(&truncation, zapt, 5);
971     truncation_value = attr_find(&truncation, NULL);
972     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
973
974     for (base_no = 0; base_no < num_bases; base_no++)
975     {
976         int ord = -1;
977         int regex_range = 0;
978         int max_pos, prefix_len = 0;
979         int relation_error;
980         char ord_buf[32];
981         int ord_len, i;
982
983         termp = *term_sub; /* start of term for each database */
984
985
986         if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
987         {
988             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
989                            basenames[base_no]);
990             return ZEBRA_FAIL;
991         }
992         
993         if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
994                               attributeSet, &ord) != ZEBRA_OK)
995             continue;
996
997
998         wrbuf_rewind(term_dict); /* new dictionary regexp term */
999
1000         bases_ok++;
1001
1002         *ol = ord_list_append(stream, *ol, ord);
1003         ord_len = key_SU_encode(ord, ord_buf);
1004         
1005         wrbuf_putc(term_dict, '(');
1006
1007         for (i = 0; i<ord_len; i++)
1008         {
1009             wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1010             wrbuf_putc(term_dict, ord_buf[i]);
1011         }
1012         wrbuf_putc(term_dict, ')');
1013
1014         prefix_len = wrbuf_len(term_dict);
1015
1016         switch (truncation_value)
1017         {
1018         case -1:         /* not specified */
1019         case 100:        /* do not truncate */
1020             if (!string_relation(zh, zapt, &termp, term_dict,
1021                                  attributeSet,
1022                                  index_type, space_split, term_dst,
1023                                  &relation_error))
1024             {
1025                 if (relation_error)
1026                 {
1027                     zebra_setError(zh, relation_error, 0);
1028                     return ZEBRA_FAIL;
1029                 }
1030                 *term_sub = 0;
1031                 return ZEBRA_OK;
1032             }
1033             break;
1034         case 1:          /* right truncation */
1035             wrbuf_putc(term_dict, '(');
1036             if (!term_100(zh->reg->zebra_maps, index_type,
1037                           &termp, term_dict, space_split, term_dst))
1038             {
1039                 *term_sub = 0;
1040                 return ZEBRA_OK;
1041             }
1042             wrbuf_puts(term_dict, ".*)");
1043             break;
1044         case 2:          /* keft truncation */
1045             wrbuf_puts(term_dict, "(.*");
1046             if (!term_100(zh->reg->zebra_maps, index_type,
1047                           &termp, term_dict, space_split, term_dst))
1048             {
1049                 *term_sub = 0;
1050                 return ZEBRA_OK;
1051             }
1052             wrbuf_putc(term_dict, ')');
1053             break;
1054         case 3:          /* left&right truncation */
1055             wrbuf_puts(term_dict, "(.*");
1056             if (!term_100(zh->reg->zebra_maps, index_type,
1057                           &termp, term_dict, space_split, term_dst))
1058             {
1059                 *term_sub = 0;
1060                 return ZEBRA_OK;
1061             }
1062             wrbuf_puts(term_dict, ".*)");
1063             break;
1064         case 101:        /* process # in term */
1065             wrbuf_putc(term_dict, '(');
1066             if (!term_101(zh->reg->zebra_maps, index_type,
1067                           &termp, term_dict, space_split, term_dst))
1068             {
1069                 *term_sub = 0;
1070                 return ZEBRA_OK;
1071             }
1072             wrbuf_puts(term_dict, ")");
1073             break;
1074         case 102:        /* Regexp-1 */
1075             wrbuf_putc(term_dict, '(');
1076             if (!term_102(zh->reg->zebra_maps, index_type,
1077                           &termp, term_dict, space_split, term_dst))
1078             {
1079                 *term_sub = 0;
1080                 return ZEBRA_OK;
1081             }
1082             wrbuf_putc(term_dict, ')');
1083             break;
1084         case 103:       /* Regexp-2 */
1085             regex_range = 1;
1086             wrbuf_putc(term_dict, '(');
1087             if (!term_103(zh->reg->zebra_maps, index_type,
1088                           &termp, term_dict, &regex_range,
1089                           space_split, term_dst))
1090             {
1091                 *term_sub = 0;
1092                 return ZEBRA_OK;
1093             }
1094             wrbuf_putc(term_dict, ')');
1095             break;
1096         case 104:        /* process # and ! in term */
1097             wrbuf_putc(term_dict, '(');
1098             if (!term_104(zh->reg->zebra_maps, index_type,
1099                           &termp, term_dict, space_split, term_dst))
1100             {
1101                 *term_sub = 0;
1102                 return ZEBRA_OK;
1103             }
1104             wrbuf_putc(term_dict, ')');
1105             break;
1106         case 105:        /* process * and ! in term */
1107             wrbuf_putc(term_dict, '(');
1108             if (!term_105(zh->reg->zebra_maps, index_type,
1109                           &termp, term_dict, space_split, term_dst, 1))
1110             {
1111                 *term_sub = 0;
1112                 return ZEBRA_OK;
1113             }
1114             wrbuf_putc(term_dict, ')');
1115             break;
1116         case 106:        /* process * and ! in term */
1117             wrbuf_putc(term_dict, '(');
1118             if (!term_105(zh->reg->zebra_maps, index_type,
1119                           &termp, term_dict, space_split, term_dst, 0))
1120             {
1121                 *term_sub = 0;
1122                 return ZEBRA_OK;
1123             }
1124             wrbuf_putc(term_dict, ')');
1125             break;
1126         default:
1127             zebra_setError_zint(zh,
1128                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1129                                 truncation_value);
1130             return ZEBRA_FAIL;
1131         }
1132         if (1)
1133         {
1134             char buf[1000];
1135             const char *input = wrbuf_cstr(term_dict) + prefix_len;
1136             esc_str(buf, sizeof(buf), input, strlen(input));
1137         }
1138         yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1139                 wrbuf_cstr(term_dict) + prefix_len);
1140         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1141                              grep_info, &max_pos, 
1142                              ord_len /* number of "exact" chars */,
1143                              grep_handle);
1144         if (r == 1)
1145             zebra_set_partial_result(zh);
1146         else if (r)
1147             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1148     }
1149     if (!bases_ok)
1150         return ZEBRA_FAIL;
1151     *term_sub = termp;
1152     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1153     return ZEBRA_OK;
1154 }
1155
1156
1157
1158 static void grep_info_delete(struct grep_info *grep_info)
1159 {
1160 #ifdef TERM_COUNT
1161     xfree(grep_info->term_no);
1162 #endif
1163     xfree(grep_info->isam_p_buf);
1164 }
1165
1166 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1167                                    Z_AttributesPlusTerm *zapt,
1168                                    struct grep_info *grep_info,
1169                                    int reg_type)
1170 {
1171 #ifdef TERM_COUNT
1172     grep_info->term_no = 0;
1173 #endif
1174     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1175     grep_info->isam_p_size = 0;
1176     grep_info->isam_p_buf = NULL;
1177     grep_info->zh = zh;
1178     grep_info->reg_type = reg_type;
1179     grep_info->termset = 0;
1180     if (zapt)
1181     {
1182         AttrType truncmax;
1183         int truncmax_value;
1184
1185         attr_init_APT(&truncmax, zapt, 13);
1186         truncmax_value = attr_find(&truncmax, NULL);
1187         if (truncmax_value != -1)
1188             grep_info->trunc_max = truncmax_value;
1189     }
1190     if (zapt)
1191     {
1192         AttrType termset;
1193         int termset_value_numeric;
1194         const char *termset_value_string;
1195
1196         attr_init_APT(&termset, zapt, 8);
1197         termset_value_numeric =
1198             attr_find_ex(&termset, NULL, &termset_value_string);
1199         if (termset_value_numeric != -1)
1200         {
1201 #if TERMSET_DISABLE
1202             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1203             return ZEBRA_FAIL;
1204 #else
1205             char resname[32];
1206             const char *termset_name = 0;
1207             if (termset_value_numeric != -2)
1208             {
1209                 
1210                 sprintf(resname, "%d", termset_value_numeric);
1211                 termset_name = resname;
1212             }
1213             else
1214             termset_name = termset_value_string;
1215             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1216             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1217             if (!grep_info->termset)
1218             {
1219                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1220                 return ZEBRA_FAIL;
1221             }
1222 #endif
1223         }
1224     }
1225     return ZEBRA_OK;
1226 }
1227                                
1228 /**
1229   \brief Create result set(s) for list of terms
1230   \param zh Zebra Handle
1231   \param zapt Attributes Plust Term (RPN leaf)
1232   \param termz term as used in query but converted to UTF-8
1233   \param attributeSet default attribute set
1234   \param stream memory for result
1235   \param reg_type register type ('w', 'p',..)
1236   \param complete_flag whether it's phrases or not
1237   \param rank_type term flags for ranking
1238   \param xpath_use use attribute for X-Path (-1 for no X-path)
1239   \param num_bases number of databases
1240   \param basenames array of databases
1241   \param rset_nmem memory for result sets
1242   \param result_sets output result set for each term in list (output)
1243   \param num_result_sets number of output result sets
1244   \param kc rset key control to be used for created result sets
1245 */
1246 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1247                                  Z_AttributesPlusTerm *zapt,
1248                                  const char *termz,
1249                                  const Odr_oid *attributeSet,
1250                                  NMEM stream,
1251                                  const char *index_type, int complete_flag,
1252                                  const char *rank_type,
1253                                  const char *xpath_use,
1254                                  int num_bases, char **basenames, 
1255                                  NMEM rset_nmem,
1256                                  RSET **result_sets, int *num_result_sets,
1257                                  struct rset_key_control *kc)
1258 {
1259     char term_dst[IT_MAX_WORD+1];
1260     struct grep_info grep_info;
1261     const char *termp = termz;
1262     int alloc_sets = 0;
1263
1264     *num_result_sets = 0;
1265     *term_dst = 0;
1266     if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1267         return ZEBRA_FAIL;
1268     while(1)
1269     { 
1270         ZEBRA_RES res;
1271
1272         if (alloc_sets == *num_result_sets)
1273         {
1274             int add = 10;
1275             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1276                                               sizeof(*rnew));
1277             if (alloc_sets)
1278                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1279             alloc_sets = alloc_sets + add;
1280             *result_sets = rnew;
1281         }
1282         res = term_trunc(zh, zapt, &termp, attributeSet,
1283                          stream, &grep_info,
1284                          index_type, complete_flag,
1285                          num_bases, basenames,
1286                          term_dst, rank_type,
1287                          xpath_use, rset_nmem,
1288                          &(*result_sets)[*num_result_sets],
1289                          kc);
1290         if (res != ZEBRA_OK)
1291         {
1292             int i;
1293             for (i = 0; i < *num_result_sets; i++)
1294                 rset_delete((*result_sets)[i]);
1295             grep_info_delete(&grep_info);
1296             return res;
1297         }
1298         if ((*result_sets)[*num_result_sets] == 0)
1299             break;
1300         (*num_result_sets)++;
1301
1302         if (!*termp)
1303             break;
1304     }
1305     grep_info_delete(&grep_info);
1306     return ZEBRA_OK;
1307 }
1308
1309 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1310                                          Z_AttributesPlusTerm *zapt,
1311                                          const Odr_oid *attributeSet,
1312                                          const char *index_type,
1313                                          int num_bases, char **basenames,
1314                                          NMEM rset_nmem,
1315                                          RSET *rset,
1316                                          struct rset_key_control *kc)
1317 {
1318     RSET *f_set;
1319     int base_no;
1320     int position_value;
1321     int num_sets = 0;
1322     AttrType position;
1323
1324     attr_init_APT(&position, zapt, 3);
1325     position_value = attr_find(&position, NULL);
1326     switch(position_value)
1327     {
1328     case 3:
1329     case -1:
1330         return ZEBRA_OK;
1331     case 1:
1332     case 2:
1333         break;
1334     default:
1335         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1336                             position_value);
1337         return ZEBRA_FAIL;
1338     }
1339
1340     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, *index_type))
1341     {
1342         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1343                             position_value);
1344         return ZEBRA_FAIL;
1345     }
1346
1347     if (!zh->reg->isamb && !zh->reg->isamc)
1348     {
1349         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1350                             position_value);
1351         return ZEBRA_FAIL;
1352     }
1353     f_set = xmalloc(sizeof(RSET) * num_bases);
1354     for (base_no = 0; base_no < num_bases; base_no++)
1355     {
1356         int ord = -1;
1357         char ord_buf[32];
1358         char term_dict[100];
1359         int ord_len;
1360         char *val;
1361         ISAM_P isam_p;
1362
1363         if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
1364         {
1365             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1366                            basenames[base_no]);
1367             return ZEBRA_FAIL;
1368         }
1369         
1370         if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1371                               attributeSet, &ord) != ZEBRA_OK)
1372             continue;
1373
1374         ord_len = key_SU_encode(ord, ord_buf);
1375         memcpy(term_dict, ord_buf, ord_len);
1376         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1377         val = dict_lookup(zh->reg->dict, term_dict);
1378         if (!val)
1379             continue;
1380         assert(*val == sizeof(ISAM_P));
1381         memcpy(&isam_p, val+1, sizeof(isam_p));
1382         
1383
1384         if (zh->reg->isamb)
1385             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1386                                                zh->reg->isamb, isam_p, 0);
1387         else if (zh->reg->isamc)
1388             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1389                                                zh->reg->isamc, isam_p, 0);
1390     }
1391     if (num_sets)
1392     {
1393         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1394                                0 /* termid */, num_sets, f_set);
1395     }
1396     xfree(f_set);
1397     return ZEBRA_OK;
1398 }
1399                                          
1400 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1401                                        Z_AttributesPlusTerm *zapt,
1402                                        const char *termz_org,
1403                                        const Odr_oid *attributeSet,
1404                                        NMEM stream,
1405                                        const char *index_type, int complete_flag,
1406                                        const char *rank_type,
1407                                        const char *xpath_use,
1408                                        int num_bases, char **basenames, 
1409                                        NMEM rset_nmem,
1410                                        RSET *rset,
1411                                        struct rset_key_control *kc)
1412 {
1413     RSET *result_sets = 0;
1414     int num_result_sets = 0;
1415     ZEBRA_RES res =
1416         term_list_trunc(zh, zapt, termz_org, attributeSet,
1417                         stream, index_type, complete_flag,
1418                         rank_type, xpath_use,
1419                         num_bases, basenames,
1420                         rset_nmem,
1421                         &result_sets, &num_result_sets, kc);
1422
1423     if (res != ZEBRA_OK)
1424         return res;
1425
1426     if (num_result_sets > 0)
1427     {
1428         RSET first_set = 0;
1429         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1430                                       index_type,
1431                                       num_bases, basenames,
1432                                       rset_nmem, &first_set,
1433                                       kc);
1434         if (res != ZEBRA_OK)
1435             return res;
1436         if (first_set)
1437         {
1438             RSET *nsets = nmem_malloc(stream,
1439                                       sizeof(RSET) * (num_result_sets+1));
1440             nsets[0] = first_set;
1441             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1442             result_sets = nsets;
1443             num_result_sets++;
1444         }
1445     }
1446     if (num_result_sets == 0)
1447         *rset = rset_create_null(rset_nmem, kc, 0); 
1448     else if (num_result_sets == 1)
1449         *rset = result_sets[0];
1450     else
1451         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1452                                  num_result_sets, result_sets,
1453                                  1 /* ordered */, 0 /* exclusion */,
1454                                  3 /* relation */, 1 /* distance */);
1455     if (!*rset)
1456         return ZEBRA_FAIL;
1457     return ZEBRA_OK;
1458 }
1459
1460 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1461                                         Z_AttributesPlusTerm *zapt,
1462                                         const char *termz_org,
1463                                         const Odr_oid *attributeSet,
1464                                         NMEM stream,
1465                                         const char *index_type, 
1466                                         int complete_flag,
1467                                         const char *rank_type,
1468                                         const char *xpath_use,
1469                                         int num_bases, char **basenames,
1470                                         NMEM rset_nmem,
1471                                         RSET *rset,
1472                                         struct rset_key_control *kc)
1473 {
1474     RSET *result_sets = 0;
1475     int num_result_sets = 0;
1476     int i;
1477     ZEBRA_RES res =
1478         term_list_trunc(zh, zapt, termz_org, attributeSet,
1479                         stream, index_type, complete_flag,
1480                         rank_type, xpath_use,
1481                         num_bases, basenames,
1482                         rset_nmem,
1483                         &result_sets, &num_result_sets, kc);
1484     if (res != ZEBRA_OK)
1485         return res;
1486
1487     for (i = 0; i<num_result_sets; i++)
1488     {
1489         RSET first_set = 0;
1490         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1491                                       index_type,
1492                                       num_bases, basenames,
1493                                       rset_nmem, &first_set,
1494                                       kc);
1495         if (res != ZEBRA_OK)
1496         {
1497             for (i = 0; i<num_result_sets; i++)
1498                 rset_delete(result_sets[i]);
1499             return res;
1500         }
1501
1502         if (first_set)
1503         {
1504             RSET tmp_set[2];
1505
1506             tmp_set[0] = first_set;
1507             tmp_set[1] = result_sets[i];
1508             
1509             result_sets[i] = rset_create_prox(
1510                 rset_nmem, kc, kc->scope,
1511                 2, tmp_set,
1512                 1 /* ordered */, 0 /* exclusion */,
1513                 3 /* relation */, 1 /* distance */);
1514         }
1515     }
1516     if (num_result_sets == 0)
1517         *rset = rset_create_null(rset_nmem, kc, 0); 
1518     else if (num_result_sets == 1)
1519         *rset = result_sets[0];
1520     else
1521         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1522                                num_result_sets, result_sets);
1523     if (!*rset)
1524         return ZEBRA_FAIL;
1525     return ZEBRA_OK;
1526 }
1527
1528 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1529                                          Z_AttributesPlusTerm *zapt,
1530                                          const char *termz_org,
1531                                          const Odr_oid *attributeSet,
1532                                          NMEM stream,
1533                                          const char *index_type, 
1534                                          int complete_flag,
1535                                          const char *rank_type, 
1536                                          const char *xpath_use,
1537                                          int num_bases, char **basenames,
1538                                          NMEM rset_nmem,
1539                                          RSET *rset,
1540                                          struct rset_key_control *kc)
1541 {
1542     RSET *result_sets = 0;
1543     int num_result_sets = 0;
1544     int i;
1545     ZEBRA_RES res =
1546         term_list_trunc(zh, zapt, termz_org, attributeSet,
1547                         stream, index_type, complete_flag,
1548                         rank_type, xpath_use,
1549                         num_bases, basenames,
1550                         rset_nmem,
1551                         &result_sets, &num_result_sets,
1552                         kc);
1553     if (res != ZEBRA_OK)
1554         return res;
1555     for (i = 0; i<num_result_sets; i++)
1556     {
1557         RSET first_set = 0;
1558         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1559                                       index_type,
1560                                       num_bases, basenames,
1561                                       rset_nmem, &first_set,
1562                                       kc);
1563         if (res != ZEBRA_OK)
1564         {
1565             for (i = 0; i<num_result_sets; i++)
1566                 rset_delete(result_sets[i]);
1567             return res;
1568         }
1569
1570         if (first_set)
1571         {
1572             RSET tmp_set[2];
1573
1574             tmp_set[0] = first_set;
1575             tmp_set[1] = result_sets[i];
1576             
1577             result_sets[i] = rset_create_prox(
1578                 rset_nmem, kc, kc->scope,
1579                 2, tmp_set,
1580                 1 /* ordered */, 0 /* exclusion */,
1581                 3 /* relation */, 1 /* distance */);
1582         }
1583     }
1584
1585
1586     if (num_result_sets == 0)
1587         *rset = rset_create_null(rset_nmem, kc, 0); 
1588     else if (num_result_sets == 1)
1589         *rset = result_sets[0];
1590     else
1591         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1592                                 num_result_sets, result_sets);
1593     if (!*rset)
1594         return ZEBRA_FAIL;
1595     return ZEBRA_OK;
1596 }
1597
1598 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1599                             const char **term_sub,
1600                             WRBUF term_dict,
1601                             const Odr_oid *attributeSet,
1602                             struct grep_info *grep_info,
1603                             int *max_pos,
1604                             const char *index_type,
1605                             char *term_dst,
1606                             int *error_code)
1607 {
1608     AttrType relation;
1609     int relation_value;
1610     int term_value;
1611     int r;
1612     WRBUF term_num = wrbuf_alloc();
1613
1614     *error_code = 0;
1615     attr_init_APT(&relation, zapt, 2);
1616     relation_value = attr_find(&relation, NULL);
1617
1618     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1619
1620     switch (relation_value)
1621     {
1622     case 1:
1623         yaz_log(log_level_rpn, "Relation <");
1624         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1625                       term_dst))
1626         { 
1627             wrbuf_destroy(term_num);
1628             return 0;
1629         }
1630         term_value = atoi(wrbuf_cstr(term_num));
1631         gen_regular_rel(term_dict, term_value-1, 1);
1632         break;
1633     case 2:
1634         yaz_log(log_level_rpn, "Relation <=");
1635         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1636                       term_dst))
1637         {
1638             wrbuf_destroy(term_num);
1639             return 0;
1640         }
1641         term_value = atoi(wrbuf_cstr(term_num));
1642         gen_regular_rel(term_dict, term_value, 1);
1643         break;
1644     case 4:
1645         yaz_log(log_level_rpn, "Relation >=");
1646         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1647                       term_dst))
1648         {
1649             wrbuf_destroy(term_num);
1650             return 0;
1651         }
1652         term_value = atoi(wrbuf_cstr(term_num));
1653         gen_regular_rel(term_dict, term_value, 0);
1654         break;
1655     case 5:
1656         yaz_log(log_level_rpn, "Relation >");
1657         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1658                       term_dst))
1659         {
1660             wrbuf_destroy(term_num);
1661             return 0;
1662         }
1663         term_value = atoi(wrbuf_cstr(term_num));
1664         gen_regular_rel(term_dict, term_value+1, 0);
1665         break;
1666     case -1:
1667     case 3:
1668         yaz_log(log_level_rpn, "Relation =");
1669         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1670                       term_dst))
1671         {
1672             wrbuf_destroy(term_num);
1673             return 0; 
1674         }
1675         term_value = atoi(wrbuf_cstr(term_num));
1676         wrbuf_printf(term_dict, "(0*%d)", term_value);
1677         break;
1678     case 103:
1679         /* term_tmp untouched.. */
1680         while (**term_sub != '\0')
1681             (*term_sub)++;
1682         break;
1683     default:
1684         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1685         wrbuf_destroy(term_num); 
1686         return 0;
1687     }
1688     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1689                          0, grep_info, max_pos, 0, grep_handle);
1690
1691     if (r == 1)
1692         zebra_set_partial_result(zh);
1693     else if (r)
1694         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1695     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1696     wrbuf_destroy(term_num);
1697     return 1;
1698 }
1699
1700 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1701                               const char **term_sub, 
1702                               WRBUF term_dict,
1703                               const Odr_oid *attributeSet, NMEM stream,
1704                               struct grep_info *grep_info,
1705                               const char *index_type, int complete_flag,
1706                               int num_bases, char **basenames,
1707                               char *term_dst, 
1708                               const char *xpath_use,
1709                               struct ord_list **ol)
1710 {
1711     int base_no;
1712     const char *termp;
1713     struct rpn_char_map_info rcmi;
1714
1715     int bases_ok = 0;     /* no of databases with OK attribute */
1716
1717     *ol = ord_list_create(stream);
1718
1719     rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
1720
1721     for (base_no = 0; base_no < num_bases; base_no++)
1722     {
1723         int max_pos;
1724         int relation_error = 0;
1725         int ord, ord_len, i;
1726         char ord_buf[32];
1727
1728         termp = *term_sub;
1729
1730         if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
1731         {
1732             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1733                            basenames[base_no]);
1734             return ZEBRA_FAIL;
1735         }
1736
1737         if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1738                               attributeSet, &ord) != ZEBRA_OK)
1739             continue;
1740         bases_ok++;
1741
1742         wrbuf_rewind(term_dict);
1743
1744         *ol = ord_list_append(stream, *ol, ord);
1745
1746         ord_len = key_SU_encode(ord, ord_buf);
1747
1748         wrbuf_putc(term_dict, '(');
1749         for (i = 0; i < ord_len; i++)
1750         {
1751             wrbuf_putc(term_dict, 1);
1752             wrbuf_putc(term_dict, ord_buf[i]);
1753         }
1754         wrbuf_putc(term_dict, ')');
1755
1756         if (!numeric_relation(zh, zapt, &termp, term_dict,
1757                               attributeSet, grep_info, &max_pos, index_type,
1758                               term_dst, &relation_error))
1759         {
1760             if (relation_error)
1761             {
1762                 zebra_setError(zh, relation_error, 0);
1763                 return ZEBRA_FAIL;
1764             }
1765             *term_sub = 0;
1766             return ZEBRA_OK;
1767         }
1768     }
1769     if (!bases_ok)
1770         return ZEBRA_FAIL;
1771     *term_sub = termp;
1772     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1773     return ZEBRA_OK;
1774 }
1775
1776                                  
1777 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1778                                         Z_AttributesPlusTerm *zapt,
1779                                         const char *termz,
1780                                         const Odr_oid *attributeSet,
1781                                         NMEM stream,
1782                                         const char *index_type, 
1783                                         int complete_flag,
1784                                         const char *rank_type, 
1785                                         const char *xpath_use,
1786                                         int num_bases, char **basenames,
1787                                         NMEM rset_nmem,
1788                                         RSET *rset,
1789                                         struct rset_key_control *kc)
1790 {
1791     char term_dst[IT_MAX_WORD+1];
1792     const char *termp = termz;
1793     RSET *result_sets = 0;
1794     int num_result_sets = 0;
1795     ZEBRA_RES res;
1796     struct grep_info grep_info;
1797     int alloc_sets = 0;
1798     zint hits_limit_value;
1799     const char *term_ref_id_str = 0;
1800
1801     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1802
1803     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1804     if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1805         return ZEBRA_FAIL;
1806     while (1)
1807     { 
1808         struct ord_list *ol;
1809         WRBUF term_dict = wrbuf_alloc();
1810         if (alloc_sets == num_result_sets)
1811         {
1812             int add = 10;
1813             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1814                                               sizeof(*rnew));
1815             if (alloc_sets)
1816                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1817             alloc_sets = alloc_sets + add;
1818             result_sets = rnew;
1819         }
1820         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1821         grep_info.isam_p_indx = 0;
1822         res = numeric_term(zh, zapt, &termp, term_dict,
1823                            attributeSet, stream, &grep_info,
1824                            index_type, complete_flag, num_bases, basenames,
1825                            term_dst, xpath_use, &ol);
1826         wrbuf_destroy(term_dict);
1827         if (res == ZEBRA_FAIL || termp == 0)
1828             break;
1829         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1830         result_sets[num_result_sets] =
1831             rset_trunc(zh, grep_info.isam_p_buf,
1832                        grep_info.isam_p_indx, term_dst,
1833                        strlen(term_dst), rank_type,
1834                        0 /* preserve position */,
1835                        zapt->term->which, rset_nmem, 
1836                        kc, kc->scope, ol, index_type,
1837                        hits_limit_value,
1838                        term_ref_id_str);
1839         if (!result_sets[num_result_sets])
1840             break;
1841         num_result_sets++;
1842         if (!*termp)
1843             break;
1844     }
1845     grep_info_delete(&grep_info);
1846
1847     if (res != ZEBRA_OK)
1848         return res;
1849     if (num_result_sets == 0)
1850         *rset = rset_create_null(rset_nmem, kc, 0);
1851     else if (num_result_sets == 1)
1852         *rset = result_sets[0];
1853     else
1854         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1855                                 num_result_sets, result_sets);
1856     if (!*rset)
1857         return ZEBRA_FAIL;
1858     return ZEBRA_OK;
1859 }
1860
1861 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1862                                       Z_AttributesPlusTerm *zapt,
1863                                       const char *termz,
1864                                       const Odr_oid *attributeSet,
1865                                       NMEM stream,
1866                                       const char *rank_type, NMEM rset_nmem,
1867                                       RSET *rset,
1868                                       struct rset_key_control *kc)
1869 {
1870     Record rec;
1871     zint sysno = atozint(termz);
1872     
1873     if (sysno <= 0)
1874         sysno = 0;
1875     rec = rec_get(zh->reg->records, sysno);
1876     if (!rec)
1877         sysno = 0;
1878
1879     rec_free(&rec);
1880
1881     if (sysno <= 0)
1882     {
1883         *rset = rset_create_null(rset_nmem, kc, 0);
1884     }
1885     else
1886     {
1887         RSFD rsfd;
1888         struct it_key key;
1889         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1890                                  res_get(zh->res, "setTmpDir"), 0);
1891         rsfd = rset_open(*rset, RSETF_WRITE);
1892         
1893         key.mem[0] = sysno;
1894         key.mem[1] = 1;
1895         key.len = 2;
1896         rset_write(rsfd, &key);
1897         rset_close(rsfd);
1898     }
1899     return ZEBRA_OK;
1900 }
1901
1902 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1903                                const Odr_oid *attributeSet, NMEM stream,
1904                                Z_SortKeySpecList *sort_sequence,
1905                                const char *rank_type,
1906                                NMEM rset_nmem,
1907                                RSET *rset,
1908                                struct rset_key_control *kc)
1909 {
1910     int i;
1911     int sort_relation_value;
1912     AttrType sort_relation_type;
1913     Z_SortKeySpec *sks;
1914     Z_SortKey *sk;
1915     char termz[20];
1916     
1917     attr_init_APT(&sort_relation_type, zapt, 7);
1918     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1919
1920     if (!sort_sequence->specs)
1921     {
1922         sort_sequence->num_specs = 10;
1923         sort_sequence->specs = (Z_SortKeySpec **)
1924             nmem_malloc(stream, sort_sequence->num_specs *
1925                          sizeof(*sort_sequence->specs));
1926         for (i = 0; i<sort_sequence->num_specs; i++)
1927             sort_sequence->specs[i] = 0;
1928     }
1929     if (zapt->term->which != Z_Term_general)
1930         i = 0;
1931     else
1932         i = atoi_n((char *) zapt->term->u.general->buf,
1933                     zapt->term->u.general->len);
1934     if (i >= sort_sequence->num_specs)
1935         i = 0;
1936     sprintf(termz, "%d", i);
1937
1938     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1939     sks->sortElement = (Z_SortElement *)
1940         nmem_malloc(stream, sizeof(*sks->sortElement));
1941     sks->sortElement->which = Z_SortElement_generic;
1942     sk = sks->sortElement->u.generic = (Z_SortKey *)
1943         nmem_malloc(stream, sizeof(*sk));
1944     sk->which = Z_SortKey_sortAttributes;
1945     sk->u.sortAttributes = (Z_SortAttributes *)
1946         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1947
1948     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1949     sk->u.sortAttributes->list = zapt->attributes;
1950
1951     sks->sortRelation = (int *)
1952         nmem_malloc(stream, sizeof(*sks->sortRelation));
1953     if (sort_relation_value == 1)
1954         *sks->sortRelation = Z_SortKeySpec_ascending;
1955     else if (sort_relation_value == 2)
1956         *sks->sortRelation = Z_SortKeySpec_descending;
1957     else 
1958         *sks->sortRelation = Z_SortKeySpec_ascending;
1959
1960     sks->caseSensitivity = (int *)
1961         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1962     *sks->caseSensitivity = 0;
1963
1964     sks->which = Z_SortKeySpec_null;
1965     sks->u.null = odr_nullval ();
1966     sort_sequence->specs[i] = sks;
1967     *rset = rset_create_null(rset_nmem, kc, 0);
1968     return ZEBRA_OK;
1969 }
1970
1971
1972 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1973                            const Odr_oid *attributeSet,
1974                            struct xpath_location_step *xpath, int max,
1975                            NMEM mem)
1976 {
1977     const Odr_oid *curAttributeSet = attributeSet;
1978     AttrType use;
1979     const char *use_string = 0;
1980     
1981     attr_init_APT(&use, zapt, 1);
1982     attr_find_ex(&use, &curAttributeSet, &use_string);
1983
1984     if (!use_string || *use_string != '/')
1985         return -1;
1986
1987     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1988 }
1989  
1990                
1991
1992 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1993                         const char *index_type, const char *term, 
1994                         const char *xpath_use,
1995                         NMEM rset_nmem,
1996                         struct rset_key_control *kc)
1997 {
1998     struct grep_info grep_info;
1999     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2000                                            zinfo_index_category_index,
2001                                            index_type, xpath_use);
2002     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2003         return rset_create_null(rset_nmem, kc, 0);
2004     
2005     if (ord < 0)
2006         return rset_create_null(rset_nmem, kc, 0);
2007     else
2008     {
2009         int i, r, max_pos;
2010         char ord_buf[32];
2011         RSET rset;
2012         WRBUF term_dict = wrbuf_alloc();
2013         int ord_len = key_SU_encode(ord, ord_buf);
2014         int term_type = Z_Term_characterString;
2015         const char *flags = "void";
2016
2017         wrbuf_putc(term_dict, '(');
2018         for (i = 0; i<ord_len; i++)
2019         {
2020             wrbuf_putc(term_dict, 1);
2021             wrbuf_putc(term_dict, ord_buf[i]);
2022         }
2023         wrbuf_putc(term_dict, ')');
2024         wrbuf_puts(term_dict, term);
2025         
2026         grep_info.isam_p_indx = 0;
2027         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2028                              &grep_info, &max_pos, 0, grep_handle);
2029         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2030                 grep_info.isam_p_indx);
2031         rset = rset_trunc(zh, grep_info.isam_p_buf,
2032                           grep_info.isam_p_indx, term, strlen(term),
2033                           flags, 1, term_type, rset_nmem,
2034                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2035                           0 /* term_ref_id_str */);
2036         grep_info_delete(&grep_info);
2037         wrbuf_destroy(term_dict);
2038         return rset;
2039     }
2040 }
2041
2042 static
2043 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2044                            int num_bases, char **basenames,
2045                            NMEM stream, const char *rank_type, RSET rset,
2046                            int xpath_len, struct xpath_location_step *xpath,
2047                            NMEM rset_nmem,
2048                            RSET *rset_out,
2049                            struct rset_key_control *kc)
2050 {
2051     int base_no;
2052     int i;
2053     int always_matches = rset ? 0 : 1;
2054
2055     if (xpath_len < 0)
2056     {
2057         *rset_out = rset;
2058         return ZEBRA_OK;
2059     }
2060
2061     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2062     for (i = 0; i<xpath_len; i++)
2063     {
2064         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2065
2066     }
2067
2068     /*
2069       //a    ->    a/.*
2070       //a/b  ->    b/a/.*
2071       /a     ->    a/
2072       /a/b   ->    b/a/
2073
2074       /      ->    none
2075
2076    a[@attr = value]/b[@other = othervalue]
2077
2078  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2079  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2080  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2081  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2082  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2083  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2084       
2085     */
2086
2087     dict_grep_cmap(zh->reg->dict, 0, 0);
2088
2089     for (base_no = 0; base_no < num_bases; base_no++)
2090     {
2091         int level = xpath_len;
2092         int first_path = 1;
2093         
2094         if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no]))
2095         {
2096             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2097                            basenames[base_no]);
2098             *rset_out = rset;
2099             return ZEBRA_FAIL;
2100         }
2101         while (--level >= 0)
2102         {
2103             WRBUF xpath_rev = wrbuf_alloc();
2104             int i;
2105             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2106
2107             for (i = level; i >= 1; --i)
2108             {
2109                 const char *cp = xpath[i].part;
2110                 if (*cp)
2111                 {
2112                     for (; *cp; cp++)
2113                     {
2114                         if (*cp == '*')
2115                             wrbuf_puts(xpath_rev, "[^/]*");
2116                         else if (*cp == ' ')
2117                             wrbuf_puts(xpath_rev, "\001 ");
2118                         else
2119                             wrbuf_putc(xpath_rev, *cp);
2120
2121                         /* wrbuf_putc does not null-terminate , but
2122                            wrbuf_puts below ensures it does.. so xpath_rev
2123                            is OK iff length is > 0 */
2124                     }
2125                     wrbuf_puts(xpath_rev, "/");
2126                 }
2127                 else if (i == 1)  /* // case */
2128                     wrbuf_puts(xpath_rev, ".*");
2129             }
2130             if (xpath[level].predicate &&
2131                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2132                 xpath[level].predicate->u.relation.name[0])
2133             {
2134                 WRBUF wbuf = wrbuf_alloc();
2135                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2136                 if (xpath[level].predicate->u.relation.value)
2137                 {
2138                     const char *cp = xpath[level].predicate->u.relation.value;
2139                     wrbuf_putc(wbuf, '=');
2140                     
2141                     while (*cp)
2142                     {
2143                         if (strchr(REGEX_CHARS, *cp))
2144                             wrbuf_putc(wbuf, '\\');
2145                         wrbuf_putc(wbuf, *cp);
2146                         cp++;
2147                     }
2148                 }
2149                 rset_attr = xpath_trunc(
2150                     zh, stream, "0", wrbuf_cstr(wbuf), 
2151                     ZEBRA_XPATH_ATTR_NAME, 
2152                     rset_nmem, kc);
2153                 wrbuf_destroy(wbuf);
2154             } 
2155             else 
2156             {
2157                 if (!first_path)
2158                 {
2159                     wrbuf_destroy(xpath_rev);
2160                     continue;
2161                 }
2162             }
2163             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2164                     wrbuf_cstr(xpath_rev));
2165             if (wrbuf_len(xpath_rev))
2166             {
2167                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2168                                              wrbuf_cstr(xpath_rev),
2169                                              ZEBRA_XPATH_ELM_BEGIN, 
2170                                              rset_nmem, kc);
2171                 if (always_matches)
2172                     rset = rset_start_tag;
2173                 else
2174                 {
2175                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2176                                                wrbuf_cstr(xpath_rev),
2177                                                ZEBRA_XPATH_ELM_END, 
2178                                                rset_nmem, kc);
2179                     
2180                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2181                                                rset_start_tag, rset,
2182                                                rset_end_tag, rset_attr);
2183                 }
2184             }
2185             wrbuf_destroy(xpath_rev);
2186             first_path = 0;
2187         }
2188     }
2189     *rset_out = rset;
2190     return ZEBRA_OK;
2191 }
2192
2193 #define MAX_XPATH_STEPS 10
2194
2195 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2196                                      Z_AttributesPlusTerm *zapt,
2197                                      const Odr_oid *attributeSet, NMEM stream,
2198                                      Z_SortKeySpecList *sort_sequence,
2199                                      int num_bases, char **basenames, 
2200                                      NMEM rset_nmem,
2201                                      RSET *rset,
2202                                      struct rset_key_control *kc);
2203
2204 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2205                                 const Odr_oid *attributeSet, NMEM stream,
2206                                 Z_SortKeySpecList *sort_sequence,
2207                                 int num_bases, char **basenames, 
2208                                 NMEM rset_nmem,
2209                                 RSET *rset,
2210                                 struct rset_key_control *kc)
2211 {
2212     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2213     ZEBRA_RES res = ZEBRA_OK;
2214     int i;
2215     for (i = 0; i < num_bases; i++)
2216     {
2217
2218         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2219         {
2220             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2221                            basenames[i]);
2222             res = ZEBRA_FAIL;
2223             break;
2224         }
2225         res = rpn_search_database(zh, zapt, attributeSet, stream,
2226                                   sort_sequence, 1, basenames+i,
2227                                   rset_nmem, rsets+i, kc);
2228         if (res != ZEBRA_OK)
2229             break;
2230     }
2231     if (res != ZEBRA_OK)
2232     {   /* must clean up the already created sets */
2233         int j;
2234         for (i = 0; j < i; j++)
2235             rset_delete(rsets[j]);
2236         *rset = 0;
2237     }
2238     else 
2239     {
2240         if (num_bases == 1)
2241             *rset = rsets[0];
2242         else if (num_bases == 0)
2243             *rset = rset_create_null(rset_nmem, kc, 0); 
2244         else
2245             *rset = rset_create_and(rset_nmem, kc, kc->scope,
2246                                     num_bases, rsets);
2247     }
2248     return res;
2249 }
2250
2251 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2252                                      Z_AttributesPlusTerm *zapt,
2253                                      const Odr_oid *attributeSet, NMEM stream,
2254                                      Z_SortKeySpecList *sort_sequence,
2255                                      int num_bases, char **basenames, 
2256                                      NMEM rset_nmem,
2257                                      RSET *rset,
2258                                      struct rset_key_control *kc)
2259 {
2260     ZEBRA_RES res = ZEBRA_OK;
2261     const char *index_type;
2262     char *search_type = NULL;
2263     char rank_type[128];
2264     int complete_flag;
2265     int sort_flag;
2266     char termz[IT_MAX_WORD+1];
2267     int xpath_len;
2268     const char *xpath_use = 0;
2269     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2270
2271     if (!log_level_set)
2272     {
2273         log_level_rpn = yaz_log_module_level("rpn");
2274         log_level_set = 1;
2275     }
2276     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2277                     rank_type, &complete_flag, &sort_flag);
2278     
2279     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2280     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2281     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2282     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2283
2284     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2285         return ZEBRA_FAIL;
2286
2287     if (sort_flag)
2288         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2289                              rank_type, rset_nmem, rset, kc);
2290     /* consider if an X-Path query is used */
2291     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2292                                 xpath, MAX_XPATH_STEPS, stream);
2293     if (xpath_len >= 0)
2294     {
2295         if (xpath[xpath_len-1].part[0] == '@') 
2296             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2297         else
2298             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2299
2300         if (1)
2301         {
2302             AttrType relation;
2303             int relation_value;
2304
2305             attr_init_APT(&relation, zapt, 2);
2306             relation_value = attr_find(&relation, NULL);
2307
2308             if (relation_value == 103) /* alwaysmatches */
2309             {
2310                 *rset = 0; /* signal no "term" set */
2311                 return rpn_search_xpath(zh, num_bases, basenames,
2312                                         stream, rank_type, *rset, 
2313                                         xpath_len, xpath, rset_nmem, rset, kc);
2314             }
2315         }
2316     }
2317
2318     /* search using one of the various search type strategies
2319        termz is our UTF-8 search term
2320        attributeSet is top-level default attribute set 
2321        stream is ODR for search
2322        reg_id is the register type
2323        complete_flag is 1 for complete subfield, 0 for incomplete
2324        xpath_use is use-attribute to be used for X-Path search, 0 for none
2325     */
2326     if (!strcmp(search_type, "phrase"))
2327     {
2328         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2329                                     index_type, complete_flag, rank_type,
2330                                     xpath_use,
2331                                     num_bases, basenames, rset_nmem,
2332                                     rset, kc);
2333     }
2334     else if (!strcmp(search_type, "and-list"))
2335     {
2336         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2337                                       index_type, complete_flag, rank_type,
2338                                       xpath_use,
2339                                       num_bases, basenames, rset_nmem,
2340                                       rset, kc);
2341     }
2342     else if (!strcmp(search_type, "or-list"))
2343     {
2344         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2345                                      index_type, complete_flag, rank_type,
2346                                      xpath_use,
2347                                      num_bases, basenames, rset_nmem,
2348                                      rset, kc);
2349     }
2350     else if (!strcmp(search_type, "local"))
2351     {
2352         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2353                                    rank_type, rset_nmem, rset, kc);
2354     }
2355     else if (!strcmp(search_type, "numeric"))
2356     {
2357         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2358                                      index_type, complete_flag, rank_type,
2359                                      xpath_use,
2360                                      num_bases, basenames, rset_nmem,
2361                                      rset, kc);
2362     }
2363     else
2364     {
2365         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2366         res = ZEBRA_FAIL;
2367     }
2368     if (res != ZEBRA_OK)
2369         return res;
2370     if (!*rset)
2371         return ZEBRA_FAIL;
2372     return rpn_search_xpath(zh, num_bases, basenames,
2373                             stream, rank_type, *rset, 
2374                             xpath_len, xpath, rset_nmem, rset, kc);
2375 }
2376
2377 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2378                                       const Odr_oid *attributeSet, 
2379                                       NMEM stream, NMEM rset_nmem,
2380                                       Z_SortKeySpecList *sort_sequence,
2381                                       int num_bases, char **basenames,
2382                                       RSET **result_sets, int *num_result_sets,
2383                                       Z_Operator *parent_op,
2384                                       struct rset_key_control *kc);
2385
2386 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2387                                    zint *approx_limit)
2388 {
2389     ZEBRA_RES res = ZEBRA_OK;
2390     if (zs->which == Z_RPNStructure_complex)
2391     {
2392         if (res == ZEBRA_OK)
2393             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2394                                            approx_limit);
2395         if (res == ZEBRA_OK)
2396             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2397                                            approx_limit);
2398     }
2399     else if (zs->which == Z_RPNStructure_simple)
2400     {
2401         if (zs->u.simple->which == Z_Operand_APT)
2402         {
2403             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2404             AttrType global_hits_limit_attr;
2405             int l;
2406             
2407             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2408             
2409             l = attr_find(&global_hits_limit_attr, NULL);
2410             if (l != -1)
2411                 *approx_limit = l;
2412         }
2413     }
2414     return res;
2415 }
2416
2417 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2418                          const Odr_oid *attributeSet, 
2419                          NMEM stream, NMEM rset_nmem,
2420                          Z_SortKeySpecList *sort_sequence,
2421                          int num_bases, char **basenames,
2422                          RSET *result_set)
2423 {
2424     RSET *result_sets = 0;
2425     int num_result_sets = 0;
2426     ZEBRA_RES res;
2427     struct rset_key_control *kc = zebra_key_control_create(zh);
2428
2429     res = rpn_search_structure(zh, zs, attributeSet,
2430                                stream, rset_nmem,
2431                                sort_sequence, 
2432                                num_bases, basenames,
2433                                &result_sets, &num_result_sets,
2434                                0 /* no parent op */,
2435                                kc);
2436     if (res != ZEBRA_OK)
2437     {
2438         int i;
2439         for (i = 0; i<num_result_sets; i++)
2440             rset_delete(result_sets[i]);
2441         *result_set = 0;
2442     }
2443     else
2444     {
2445         assert(num_result_sets == 1);
2446         assert(result_sets);
2447         assert(*result_sets);
2448         *result_set = *result_sets;
2449     }
2450     (*kc->dec)(kc);
2451     return res;
2452 }
2453
2454 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2455                                const Odr_oid *attributeSet, 
2456                                NMEM stream, NMEM rset_nmem,
2457                                Z_SortKeySpecList *sort_sequence,
2458                                int num_bases, char **basenames,
2459                                RSET **result_sets, int *num_result_sets,
2460                                Z_Operator *parent_op,
2461                                struct rset_key_control *kc)
2462 {
2463     *num_result_sets = 0;
2464     if (zs->which == Z_RPNStructure_complex)
2465     {
2466         ZEBRA_RES res;
2467         Z_Operator *zop = zs->u.complex->roperator;
2468         RSET *result_sets_l = 0;
2469         int num_result_sets_l = 0;
2470         RSET *result_sets_r = 0;
2471         int num_result_sets_r = 0;
2472
2473         res = rpn_search_structure(zh, zs->u.complex->s1,
2474                                    attributeSet, stream, rset_nmem,
2475                                    sort_sequence,
2476                                    num_bases, basenames,
2477                                    &result_sets_l, &num_result_sets_l,
2478                                    zop, kc);
2479         if (res != ZEBRA_OK)
2480         {
2481             int i;
2482             for (i = 0; i<num_result_sets_l; i++)
2483                 rset_delete(result_sets_l[i]);
2484             return res;
2485         }
2486         res = rpn_search_structure(zh, zs->u.complex->s2,
2487                                    attributeSet, stream, rset_nmem,
2488                                    sort_sequence,
2489                                    num_bases, basenames,
2490                                    &result_sets_r, &num_result_sets_r,
2491                                    zop, kc);
2492         if (res != ZEBRA_OK)
2493         {
2494             int i;
2495             for (i = 0; i<num_result_sets_l; i++)
2496                 rset_delete(result_sets_l[i]);
2497             for (i = 0; i<num_result_sets_r; i++)
2498                 rset_delete(result_sets_r[i]);
2499             return res;
2500         }
2501
2502         /* make a new list of result for all children */
2503         *num_result_sets = num_result_sets_l + num_result_sets_r;
2504         *result_sets = nmem_malloc(stream, *num_result_sets * 
2505                                    sizeof(**result_sets));
2506         memcpy(*result_sets, result_sets_l, 
2507                num_result_sets_l * sizeof(**result_sets));
2508         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2509                num_result_sets_r * sizeof(**result_sets));
2510
2511         if (!parent_op || parent_op->which != zop->which
2512             || (zop->which != Z_Operator_and &&
2513                 zop->which != Z_Operator_or))
2514         {
2515             /* parent node different from this one (or non-present) */
2516             /* we must combine result sets now */
2517             RSET rset;
2518             switch (zop->which)
2519             {
2520             case Z_Operator_and:
2521                 rset = rset_create_and(rset_nmem, kc,
2522                                        kc->scope,
2523                                        *num_result_sets, *result_sets);
2524                 break;
2525             case Z_Operator_or:
2526                 rset = rset_create_or(rset_nmem, kc,
2527                                       kc->scope, 0, /* termid */
2528                                       *num_result_sets, *result_sets);
2529                 break;
2530             case Z_Operator_and_not:
2531                 rset = rset_create_not(rset_nmem, kc,
2532                                        kc->scope,
2533                                        (*result_sets)[0],
2534                                        (*result_sets)[1]);
2535                 break;
2536             case Z_Operator_prox:
2537                 if (zop->u.prox->which != Z_ProximityOperator_known)
2538                 {
2539                     zebra_setError(zh, 
2540                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2541                                    0);
2542                     return ZEBRA_FAIL;
2543                 }
2544                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2545                 {
2546                     zebra_setError_zint(zh,
2547                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2548                                         *zop->u.prox->u.known);
2549                     return ZEBRA_FAIL;
2550                 }
2551                 else
2552                 {
2553                     rset = rset_create_prox(rset_nmem, kc,
2554                                             kc->scope,
2555                                             *num_result_sets, *result_sets, 
2556                                             *zop->u.prox->ordered,
2557                                             (!zop->u.prox->exclusion ? 
2558                                              0 : *zop->u.prox->exclusion),
2559                                             *zop->u.prox->relationType,
2560                                             *zop->u.prox->distance );
2561                 }
2562                 break;
2563             default:
2564                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2565                 return ZEBRA_FAIL;
2566             }
2567             *num_result_sets = 1;
2568             *result_sets = nmem_malloc(stream, *num_result_sets * 
2569                                        sizeof(**result_sets));
2570             (*result_sets)[0] = rset;
2571         }
2572     }
2573     else if (zs->which == Z_RPNStructure_simple)
2574     {
2575         RSET rset;
2576         ZEBRA_RES res;
2577
2578         if (zs->u.simple->which == Z_Operand_APT)
2579         {
2580             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2581             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2582                                  attributeSet, stream, sort_sequence,
2583                                  num_bases, basenames, rset_nmem, &rset,
2584                                  kc);
2585             if (res != ZEBRA_OK)
2586                 return res;
2587         }
2588         else if (zs->u.simple->which == Z_Operand_resultSetId)
2589         {
2590             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2591             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2592             if (!rset)
2593             {
2594                 zebra_setError(zh, 
2595                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2596                                zs->u.simple->u.resultSetId);
2597                 return ZEBRA_FAIL;
2598             }
2599             rset_dup(rset);
2600         }
2601         else
2602         {
2603             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2604             return ZEBRA_FAIL;
2605         }
2606         *num_result_sets = 1;
2607         *result_sets = nmem_malloc(stream, *num_result_sets * 
2608                                    sizeof(**result_sets));
2609         (*result_sets)[0] = rset;
2610     }
2611     else
2612     {
2613         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2614         return ZEBRA_FAIL;
2615     }
2616     return ZEBRA_OK;
2617 }
2618
2619
2620
2621 /*
2622  * Local variables:
2623  * c-basic-offset: 4
2624  * indent-tabs-mode: nil
2625  * End:
2626  * vim: shiftwidth=4 tabstop=8 expandtab
2627  */
2628