Added support for specification of approximative limits for whole query.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.3 2006-11-30 10:33:19 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     ZebraHandle zh;
82     int reg_type;
83     ZebraSet termset;
84 };        
85
86 static void add_isam_p(const char *name, const char *info,
87                        struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     if (p->isam_p_indx == p->isam_p_size)
95     {
96         ISAM_P *new_isam_p_buf;
97 #ifdef TERM_COUNT        
98         int *new_term_no;        
99 #endif
100         p->isam_p_size = 2*p->isam_p_size + 100;
101         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
102                                             p->isam_p_size);
103         if (p->isam_p_buf)
104         {
105             memcpy(new_isam_p_buf, p->isam_p_buf,
106                     p->isam_p_indx * sizeof(*p->isam_p_buf));
107             xfree(p->isam_p_buf);
108         }
109         p->isam_p_buf = new_isam_p_buf;
110
111 #ifdef TERM_COUNT
112         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
113         if (p->term_no)
114         {
115             memcpy(new_term_no, p->isam_p_buf,
116                     p->isam_p_indx * sizeof(*p->term_no));
117             xfree(p->term_no);
118         }
119         p->term_no = new_term_no;
120 #endif
121     }
122     assert(*info == sizeof(*p->isam_p_buf));
123     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
124
125     if (p->termset)
126     {
127         const char *db;
128         char term_tmp[IT_MAX_WORD];
129         int ord = 0;
130         const char *index_name;
131         int len = key_SU_decode (&ord, (const unsigned char *) name);
132         
133         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
134         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
135         zebraExplain_lookup_ord(p->zh->reg->zei,
136                                 ord, 0 /* index_type */, &db, &index_name);
137         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
138         
139         resultSetAddTerm(p->zh, p->termset, name[len], db,
140                          index_name, term_tmp);
141     }
142     (p->isam_p_indx)++;
143 }
144
145 static int grep_handle(char *name, const char *info, void *p)
146 {
147     add_isam_p(name, info, (struct grep_info *) p);
148     return 0;
149 }
150
151 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
152                     const char *ct1, const char *ct2, int first)
153 {
154     const char *s1, *s0 = *src;
155     const char **map;
156
157     /* skip white space */
158     while (*s0)
159     {
160         if (ct1 && strchr(ct1, *s0))
161             break;
162         if (ct2 && strchr(ct2, *s0))
163             break;
164         s1 = s0;
165         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
166         if (**map != *CHR_SPACE)
167             break;
168         s0 = s1;
169     }
170     *src = s0;
171     return *s0;
172 }
173
174
175 static void esc_str(char *out_buf, size_t out_size,
176                     const char *in_buf, int in_size)
177 {
178     int k;
179
180     assert(out_buf);
181     assert(in_buf);
182     assert(out_size > 20);
183     *out_buf = '\0';
184     for (k = 0; k<in_size; k++)
185     {
186         int c = in_buf[k] & 0xff;
187         int pc;
188         if (c < 32 || c > 126)
189             pc = '?';
190         else
191             pc = c;
192         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
193         if (strlen(out_buf) > out_size-20)
194         {
195             strcat(out_buf, "..");
196             break;
197         }
198     }
199 }
200
201 #define REGEX_CHARS " []()|.*+?!"
202
203 /* term_100: handle term, where trunc = none(no operators at all) */
204 static int term_100(ZebraMaps zebra_maps, int reg_type,
205                     const char **src, char *dst, int space_split,
206                     char *dst_term)
207 {
208     const char *s0;
209     const char **map;
210     int i = 0;
211     int j = 0;
212
213     const char *space_start = 0;
214     const char *space_end = 0;
215
216     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
217         return 0;
218     s0 = *src;
219     while (*s0)
220     {
221         const char *s1 = s0;
222         int q_map_match = 0;
223         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
224                                 &q_map_match);
225         if (space_split)
226         {
227             if (**map == *CHR_SPACE)
228                 break;
229         }
230         else  /* complete subfield only. */
231         {
232             if (**map == *CHR_SPACE)
233             {   /* save space mapping for later  .. */
234                 space_start = s1;
235                 space_end = s0;
236                 continue;
237             }
238             else if (space_start)
239             {   /* reload last space */
240                 while (space_start < space_end)
241                 {
242                     if (strchr(REGEX_CHARS, *space_start))
243                         dst[i++] = '\\';
244                     dst_term[j++] = *space_start;
245                     dst[i++] = *space_start++;
246                 }
247                 /* and reset */
248                 space_start = space_end = 0;
249             }
250         }
251         /* add non-space char */
252         memcpy(dst_term+j, s1, s0 - s1);
253         j += (s0 - s1);
254         if (!q_map_match)
255         {
256             while (s1 < s0)
257             {
258                 if (strchr(REGEX_CHARS, *s1))
259                     dst[i++] = '\\';
260                 dst[i++] = *s1++;
261             }
262         }
263         else
264         {
265             char tmpbuf[80];
266             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
267             
268             strcpy(dst + i, map[0]);
269             i += strlen(map[0]);
270         }
271     }
272     dst[i] = '\0';
273     dst_term[j] = '\0';
274     *src = s0;
275     return i;
276 }
277
278 /* term_101: handle term, where trunc = Process # */
279 static int term_101(ZebraMaps zebra_maps, int reg_type,
280                     const char **src, char *dst, int space_split,
281                     char *dst_term)
282 {
283     const char *s0;
284     const char **map;
285     int i = 0;
286     int j = 0;
287
288     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
289         return 0;
290     s0 = *src;
291     while (*s0)
292     {
293         if (*s0 == '#')
294         {
295             dst[i++] = '.';
296             dst[i++] = '*';
297             dst_term[j++] = *s0++;
298         }
299         else
300         {
301             const char *s1 = s0;
302             int q_map_match = 0;
303             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
304                                     &q_map_match);
305             if (space_split && **map == *CHR_SPACE)
306                 break;
307
308             /* add non-space char */
309             memcpy(dst_term+j, s1, s0 - s1);
310             j += (s0 - s1);
311             if (!q_map_match)
312             {
313                 while (s1 < s0)
314                 {
315                     if (strchr(REGEX_CHARS, *s1))
316                         dst[i++] = '\\';
317                     dst[i++] = *s1++;
318                 }
319             }
320             else
321             {
322                 char tmpbuf[80];
323                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
324                 
325                 strcpy(dst + i, map[0]);
326                 i += strlen(map[0]);
327             }
328         }
329     }
330     dst[i] = '\0';
331     dst_term[j++] = '\0';
332     *src = s0;
333     return i;
334 }
335
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
338                     char *dst, int *errors, int space_split,
339                     char *dst_term)
340 {
341     int i = 0;
342     int j = 0;
343     const char *s0;
344     const char **map;
345
346     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
347         return 0;
348     s0 = *src;
349     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
350         isdigit(((const unsigned char *)s0)[1]))
351     {
352         *errors = s0[1] - '0';
353         s0 += 3;
354         if (*errors > 3)
355             *errors = 3;
356     }
357     while (*s0)
358     {
359         if (strchr("^\\()[].*+?|-", *s0))
360         {
361             dst_term[j++] = *s0;
362             dst[i++] = *s0++;
363         }
364         else
365         {
366             const char *s1 = s0;
367             int q_map_match = 0;
368             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
369                                     &q_map_match);
370             if (space_split && **map == *CHR_SPACE)
371                 break;
372
373             /* add non-space char */
374             memcpy(dst_term+j, s1, s0 - s1);
375             j += (s0 - s1);
376             if (!q_map_match)
377             {
378                 while (s1 < s0)
379                 {
380                     if (strchr(REGEX_CHARS, *s1))
381                         dst[i++] = '\\';
382                     dst[i++] = *s1++;
383                 }
384             }
385             else
386             {
387                 char tmpbuf[80];
388                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
389                 
390                 strcpy(dst + i, map[0]);
391                 i += strlen(map[0]);
392             }
393         }
394     }
395     dst[i] = '\0';
396     dst_term[j] = '\0';
397     *src = s0;
398     
399     return i;
400 }
401
402 /* term_103: handle term, where trunc = re-1 (regular expressions) */
403 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
404                     char *dst, int space_split, char *dst_term)
405 {
406     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
407                     dst_term);
408 }
409
410
411 /* term_104: handle term, where trunc = Process # and ! */
412 static int term_104(ZebraMaps zebra_maps, int reg_type,
413                     const char **src, char *dst, int space_split,
414                     char *dst_term)
415 {
416     const char *s0;
417     const char **map;
418     int i = 0;
419     int j = 0;
420
421     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
422         return 0;
423     s0 = *src;
424     while (*s0)
425     {
426         if (*s0 == '?')
427         {
428             dst_term[j++] = *s0++;
429             if (*s0 >= '0' && *s0 <= '9')
430             {
431                 int limit = 0;
432                 while (*s0 >= '0' && *s0 <= '9')
433                 {
434                     limit = limit * 10 + (*s0 - '0');
435                     dst_term[j++] = *s0++;
436                 }
437                 if (limit > 20)
438                     limit = 20;
439                 while (--limit >= 0)
440                 {
441                     dst[i++] = '.';
442                     dst[i++] = '?';
443                 }
444             }
445             else
446             {
447                 dst[i++] = '.';
448                 dst[i++] = '*';
449             }
450         }
451         else if (*s0 == '*')
452         {
453             dst[i++] = '.';
454             dst[i++] = '*';
455             dst_term[j++] = *s0++;
456         }
457         else if (*s0 == '#')
458         {
459             dst[i++] = '.';
460             dst_term[j++] = *s0++;
461         }
462         else
463         {
464             const char *s1 = s0;
465             int q_map_match = 0;
466             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
467                                     &q_map_match);
468             if (space_split && **map == *CHR_SPACE)
469                 break;
470
471             /* add non-space char */
472             memcpy(dst_term+j, s1, s0 - s1);
473             j += (s0 - s1);
474             if (!q_map_match)
475             {
476                 while (s1 < s0)
477                 {
478                     if (strchr(REGEX_CHARS, *s1))
479                         dst[i++] = '\\';
480                     dst[i++] = *s1++;
481                 }
482             }
483             else
484             {
485                 char tmpbuf[80];
486                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
487                 
488                 strcpy(dst + i, map[0]);
489                 i += strlen(map[0]);
490             }
491         }
492     }
493     dst[i] = '\0';
494     dst_term[j++] = '\0';
495     *src = s0;
496     return i;
497 }
498
499 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
500 static int term_105(ZebraMaps zebra_maps, int reg_type,
501                     const char **src, char *dst, int space_split,
502                     char *dst_term, int right_truncate)
503 {
504     const char *s0;
505     const char **map;
506     int i = 0;
507     int j = 0;
508
509     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
510         return 0;
511     s0 = *src;
512     while (*s0)
513     {
514         if (*s0 == '*')
515         {
516             dst[i++] = '.';
517             dst[i++] = '*';
518             dst_term[j++] = *s0++;
519         }
520         else if (*s0 == '!')
521         {
522             dst[i++] = '.';
523             dst_term[j++] = *s0++;
524         }
525         else
526         {
527             const char *s1 = s0;
528             int q_map_match = 0;
529             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
530                                     &q_map_match);
531             if (space_split && **map == *CHR_SPACE)
532                 break;
533
534             /* add non-space char */
535             memcpy(dst_term+j, s1, s0 - s1);
536             j += (s0 - s1);
537             if (!q_map_match)
538             {
539                 while (s1 < s0)
540                 {
541                     if (strchr(REGEX_CHARS, *s1))
542                         dst[i++] = '\\';
543                     dst[i++] = *s1++;
544                 }
545             }
546             else
547             {
548                 char tmpbuf[80];
549                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
550                 
551                 strcpy(dst + i, map[0]);
552                 i += strlen(map[0]);
553             }
554         }
555     }
556     if (right_truncate)
557     {
558         dst[i++] = '.';
559         dst[i++] = '*';
560     }
561     dst[i] = '\0';
562     
563     dst_term[j++] = '\0';
564     *src = s0;
565     return i;
566 }
567
568
569 /* gen_regular_rel - generate regular expression from relation
570  *  val:     border value (inclusive)
571  *  islt:    1 if <=; 0 if >=.
572  */
573 static void gen_regular_rel(char *dst, int val, int islt)
574 {
575     int dst_p;
576     int w, d, i;
577     int pos = 0;
578     char numstr[20];
579
580     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
581     if (val >= 0)
582     {
583         if (islt)
584             strcpy(dst, "(-[0-9]+|(");
585         else
586             strcpy(dst, "((");
587     } 
588     else
589     {
590         if (!islt)
591         {
592             strcpy(dst, "([0-9]+|-(");
593             dst_p = strlen(dst);
594             islt = 1;
595         }
596         else
597         {
598             strcpy(dst, "(-(");
599             islt = 0;
600         }
601         val = -val;
602     }
603     dst_p = strlen(dst);
604     sprintf(numstr, "%d", val);
605     for (w = strlen(numstr); --w >= 0; pos++)
606     {
607         d = numstr[w];
608         if (pos > 0)
609         {
610             if (islt)
611             {
612                 if (d == '0')
613                     continue;
614                 d--;
615             } 
616             else
617             {
618                 if (d == '9')
619                     continue;
620                 d++;
621             }
622         }
623         
624         strcpy(dst + dst_p, numstr);
625         dst_p = strlen(dst) - pos - 1;
626
627         if (islt)
628         {
629             if (d != '0')
630             {
631                 dst[dst_p++] = '[';
632                 dst[dst_p++] = '0';
633                 dst[dst_p++] = '-';
634                 dst[dst_p++] = d;
635                 dst[dst_p++] = ']';
636             }
637             else
638                 dst[dst_p++] = d;
639         }
640         else
641         {
642             if (d != '9')
643             { 
644                 dst[dst_p++] = '[';
645                 dst[dst_p++] = d;
646                 dst[dst_p++] = '-';
647                 dst[dst_p++] = '9';
648                 dst[dst_p++] = ']';
649             }
650             else
651                 dst[dst_p++] = d;
652         }
653         for (i = 0; i<pos; i++)
654         {
655             dst[dst_p++] = '[';
656             dst[dst_p++] = '0';
657             dst[dst_p++] = '-';
658             dst[dst_p++] = '9';
659             dst[dst_p++] = ']';
660         }
661         dst[dst_p++] = '|';
662     }
663     dst[dst_p] = '\0';
664     if (islt)
665     {
666         /* match everything less than 10^(pos-1) */
667         strcat(dst, "0*");
668         for (i = 1; i<pos; i++)
669             strcat(dst, "[0-9]?");
670     }
671     else
672     {
673         /* match everything greater than 10^pos */
674         for (i = 0; i <= pos; i++)
675             strcat(dst, "[0-9]");
676         strcat(dst, "[0-9]*");
677     }
678     strcat(dst, "))");
679 }
680
681 void string_rel_add_char(char **term_p, const char *src, int *indx)
682 {
683     if (src[*indx] == '\\')
684         *(*term_p)++ = src[(*indx)++];
685     *(*term_p)++ = src[(*indx)++];
686 }
687
688 /*
689  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
690  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
691  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
692  *              ([^-a].*|a[^-b].*|ab[c-].*)
693  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
694  *              ([^a-].*|a[^b-].*|ab[^c-].*)
695  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
696  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
697  */
698 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
699                            const char **term_sub, char *term_dict,
700                            oid_value attributeSet,
701                            int reg_type, int space_split, char *term_dst,
702                            int *error_code)
703 {
704     AttrType relation;
705     int relation_value;
706     int i;
707     char *term_tmp = term_dict + strlen(term_dict);
708     char term_component[2*IT_MAX_WORD+20];
709
710     attr_init_APT(&relation, zapt, 2);
711     relation_value = attr_find(&relation, NULL);
712
713     *error_code = 0;
714     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
715     switch (relation_value)
716     {
717     case 1:
718         if (!term_100(zh->reg->zebra_maps, reg_type,
719                       term_sub, term_component,
720                       space_split, term_dst))
721             return 0;
722         yaz_log(log_level_rpn, "Relation <");
723         
724         *term_tmp++ = '(';
725         for (i = 0; term_component[i]; )
726         {
727             int j = 0;
728
729             if (i)
730                 *term_tmp++ = '|';
731             while (j < i)
732                 string_rel_add_char(&term_tmp, term_component, &j);
733
734             *term_tmp++ = '[';
735
736             *term_tmp++ = '^';
737
738             *term_tmp++ = 1;
739             *term_tmp++ = FIRST_IN_FIELD_CHAR;
740
741             string_rel_add_char(&term_tmp, term_component, &i);
742             *term_tmp++ = '-';
743
744             *term_tmp++ = ']';
745             *term_tmp++ = '.';
746             *term_tmp++ = '*';
747
748             if ((term_tmp - term_dict) > IT_MAX_WORD)
749                 break;
750         }
751         *term_tmp++ = ')';
752         *term_tmp = '\0';
753         yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
754         break;
755     case 2:
756         if (!term_100(zh->reg->zebra_maps, reg_type,
757                       term_sub, term_component,
758                       space_split, term_dst))
759             return 0;
760         yaz_log(log_level_rpn, "Relation <=");
761
762         *term_tmp++ = '(';
763         for (i = 0; term_component[i]; )
764         {
765             int j = 0;
766
767             while (j < i)
768                 string_rel_add_char(&term_tmp, term_component, &j);
769             *term_tmp++ = '[';
770
771             *term_tmp++ = '^';
772
773             *term_tmp++ = 1;
774             *term_tmp++ = FIRST_IN_FIELD_CHAR;
775
776             string_rel_add_char(&term_tmp, term_component, &i);
777             *term_tmp++ = '-';
778
779             *term_tmp++ = ']';
780             *term_tmp++ = '.';
781             *term_tmp++ = '*';
782
783             *term_tmp++ = '|';
784
785             if ((term_tmp - term_dict) > IT_MAX_WORD)
786                 break;
787         }
788         for (i = 0; term_component[i]; )
789             string_rel_add_char(&term_tmp, term_component, &i);
790         *term_tmp++ = ')';
791         *term_tmp = '\0';
792         break;
793     case 5:
794         if (!term_100 (zh->reg->zebra_maps, reg_type,
795                        term_sub, term_component, space_split, term_dst))
796             return 0;
797         yaz_log(log_level_rpn, "Relation >");
798
799         *term_tmp++ = '(';
800         for (i = 0; term_component[i];)
801         {
802             int j = 0;
803
804             while (j < i)
805                 string_rel_add_char(&term_tmp, term_component, &j);
806             *term_tmp++ = '[';
807             
808             *term_tmp++ = '^';
809             *term_tmp++ = '-';
810             string_rel_add_char(&term_tmp, term_component, &i);
811
812             *term_tmp++ = ']';
813             *term_tmp++ = '.';
814             *term_tmp++ = '*';
815
816             *term_tmp++ = '|';
817
818             if ((term_tmp - term_dict) > IT_MAX_WORD)
819                 break;
820         }
821         for (i = 0; term_component[i];)
822             string_rel_add_char(&term_tmp, term_component, &i);
823         *term_tmp++ = '.';
824         *term_tmp++ = '+';
825         *term_tmp++ = ')';
826         *term_tmp = '\0';
827         break;
828     case 4:
829         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
830                       term_component, space_split, term_dst))
831             return 0;
832         yaz_log(log_level_rpn, "Relation >=");
833
834         *term_tmp++ = '(';
835         for (i = 0; term_component[i];)
836         {
837             int j = 0;
838
839             if (i)
840                 *term_tmp++ = '|';
841             while (j < i)
842                 string_rel_add_char(&term_tmp, term_component, &j);
843             *term_tmp++ = '[';
844
845             if (term_component[i+1])
846             {
847                 *term_tmp++ = '^';
848                 *term_tmp++ = '-';
849                 string_rel_add_char(&term_tmp, term_component, &i);
850             }
851             else
852             {
853                 string_rel_add_char(&term_tmp, term_component, &i);
854                 *term_tmp++ = '-';
855             }
856             *term_tmp++ = ']';
857             *term_tmp++ = '.';
858             *term_tmp++ = '*';
859
860             if ((term_tmp - term_dict) > IT_MAX_WORD)
861                 break;
862         }
863         *term_tmp++ = ')';
864         *term_tmp = '\0';
865         break;
866     case 3:
867     case 102:
868     case -1:
869         if (!**term_sub)
870             return 1;
871         yaz_log(log_level_rpn, "Relation =");
872         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
873                       term_component, space_split, term_dst))
874             return 0;
875         strcat(term_tmp, "(");
876         strcat(term_tmp, term_component);
877         strcat(term_tmp, ")");
878         break;
879     case 103:
880         yaz_log(log_level_rpn, "Relation always matches");
881         /* skip to end of term (we don't care what it is) */
882         while (**term_sub != '\0')
883             (*term_sub)++;
884         break;
885     default:
886         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
887         return 0;
888     }
889     return 1;
890 }
891
892 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
893                              const char **term_sub, 
894                              oid_value attributeSet, NMEM stream,
895                              struct grep_info *grep_info,
896                              int reg_type, int complete_flag,
897                              int num_bases, char **basenames,
898                              char *term_dst,
899                              const char *xpath_use,
900                              struct ord_list **ol);
901
902 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
903                                  Z_AttributesPlusTerm *zapt,
904                                  zint *hits_limit_value,
905                                  const char **term_ref_id_str,
906                                  NMEM nmem)
907 {
908     AttrType term_ref_id_attr;
909     AttrType hits_limit_attr;
910     int term_ref_id_int;
911  
912     attr_init_APT(&hits_limit_attr, zapt, 11);
913     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
914
915     attr_init_APT(&term_ref_id_attr, zapt, 10);
916     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
917     if (term_ref_id_int >= 0)
918     {
919         char *res = nmem_malloc(nmem, 20);
920         sprintf(res, "%d", term_ref_id_int);
921         *term_ref_id_str = res;
922     }
923
924     /* no limit given ? */
925     if (*hits_limit_value == -1)
926     {
927         if (*term_ref_id_str)
928         {
929             /* use global if term_ref is present */
930             *hits_limit_value = zh->approx_limit;
931         }
932         else
933         {
934             /* no counting if term_ref is not present */
935             *hits_limit_value = 0;
936         }
937     }
938     else if (*hits_limit_value == 0)
939     {
940         /* 0 is the same as global limit */
941         *hits_limit_value = zh->approx_limit;
942     }
943     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
944             *term_ref_id_str ? *term_ref_id_str : "none",
945             *hits_limit_value);
946     return ZEBRA_OK;
947 }
948
949 static ZEBRA_RES term_trunc(ZebraHandle zh,
950                             Z_AttributesPlusTerm *zapt,
951                             const char **term_sub, 
952                             oid_value attributeSet, NMEM stream,
953                             struct grep_info *grep_info,
954                             int reg_type, int complete_flag,
955                             int num_bases, char **basenames,
956                             char *term_dst,
957                             const char *rank_type, 
958                             const char *xpath_use,
959                             NMEM rset_nmem,
960                             RSET *rset,
961                             struct rset_key_control *kc)
962 {
963     ZEBRA_RES res;
964     struct ord_list *ol;
965     zint hits_limit_value;
966     const char *term_ref_id_str = 0;
967     *rset = 0;
968
969     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
970     grep_info->isam_p_indx = 0;
971     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
972                       reg_type, complete_flag, num_bases, basenames,
973                       term_dst, xpath_use, &ol);
974     if (res != ZEBRA_OK)
975         return res;
976     if (!*term_sub)  /* no more terms ? */
977         return res;
978     yaz_log(log_level_rpn, "term: %s", term_dst);
979     *rset = rset_trunc(zh, grep_info->isam_p_buf,
980                        grep_info->isam_p_indx, term_dst,
981                        strlen(term_dst), rank_type, 1 /* preserve pos */,
982                        zapt->term->which, rset_nmem,
983                        kc, kc->scope, ol, reg_type, hits_limit_value,
984                        term_ref_id_str);
985     if (!*rset)
986         return ZEBRA_FAIL;
987     return ZEBRA_OK;
988 }
989
990 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
991                              const char **term_sub, 
992                              oid_value attributeSet, NMEM stream,
993                              struct grep_info *grep_info,
994                              int reg_type, int complete_flag,
995                              int num_bases, char **basenames,
996                              char *term_dst,
997                              const char *xpath_use,
998                              struct ord_list **ol)
999 {
1000     char term_dict[2*IT_MAX_WORD+4000];
1001     int j, r, base_no;
1002     AttrType truncation;
1003     int truncation_value;
1004     const char *termp;
1005     struct rpn_char_map_info rcmi;
1006     int space_split = complete_flag ? 0 : 1;
1007
1008     int bases_ok = 0;     /* no of databases with OK attribute */
1009
1010     *ol = ord_list_create(stream);
1011
1012     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1013     attr_init_APT(&truncation, zapt, 5);
1014     truncation_value = attr_find(&truncation, NULL);
1015     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1016
1017     for (base_no = 0; base_no < num_bases; base_no++)
1018     {
1019         int ord = -1;
1020         int regex_range = 0;
1021         int max_pos, prefix_len = 0;
1022         int relation_error;
1023         char ord_buf[32];
1024         int ord_len, i;
1025
1026         termp = *term_sub; /* start of term for each database */
1027
1028         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1029         {
1030             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1031                            basenames[base_no]);
1032             return ZEBRA_FAIL;
1033         }
1034         
1035         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1036                               attributeSet, &ord) != ZEBRA_OK)
1037             continue;
1038
1039         bases_ok++;
1040
1041         *ol = ord_list_append(stream, *ol, ord);
1042         ord_len = key_SU_encode (ord, ord_buf);
1043         
1044         term_dict[prefix_len++] = '(';
1045         for (i = 0; i<ord_len; i++)
1046         {
1047             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1048             term_dict[prefix_len++] = ord_buf[i];
1049         }
1050         term_dict[prefix_len++] = ')';
1051         term_dict[prefix_len] = '\0';
1052         j = prefix_len;
1053         switch (truncation_value)
1054         {
1055         case -1:         /* not specified */
1056         case 100:        /* do not truncate */
1057             if (!string_relation(zh, zapt, &termp, term_dict,
1058                                  attributeSet,
1059                                  reg_type, space_split, term_dst,
1060                                  &relation_error))
1061             {
1062                 if (relation_error)
1063                 {
1064                     zebra_setError(zh, relation_error, 0);
1065                     return ZEBRA_FAIL;
1066                 }
1067                 *term_sub = 0;
1068                 return ZEBRA_OK;
1069             }
1070             break;
1071         case 1:          /* right truncation */
1072             term_dict[j++] = '(';
1073             if (!term_100(zh->reg->zebra_maps, reg_type,
1074                           &termp, term_dict + j, space_split, term_dst))
1075             {
1076                 *term_sub = 0;
1077                 return ZEBRA_OK;
1078             }
1079             strcat(term_dict, ".*)");
1080             break;
1081         case 2:          /* keft truncation */
1082             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083             if (!term_100(zh->reg->zebra_maps, reg_type,
1084                           &termp, term_dict + j, space_split, term_dst))
1085             {
1086                 *term_sub = 0;
1087                 return ZEBRA_OK;
1088             }
1089             strcat(term_dict, ")");
1090             break;
1091         case 3:          /* left&right truncation */
1092             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1093             if (!term_100(zh->reg->zebra_maps, reg_type,
1094                           &termp, term_dict + j, space_split, term_dst))
1095             {
1096                 *term_sub = 0;
1097                 return ZEBRA_OK;
1098             }
1099             strcat(term_dict, ".*)");
1100             break;
1101         case 101:        /* process # in term */
1102             term_dict[j++] = '(';
1103             if (!term_101(zh->reg->zebra_maps, reg_type,
1104                           &termp, term_dict + j, space_split, term_dst))
1105             {
1106                 *term_sub = 0;
1107                 return ZEBRA_OK;
1108             }
1109             strcat(term_dict, ")");
1110             break;
1111         case 102:        /* Regexp-1 */
1112             term_dict[j++] = '(';
1113             if (!term_102(zh->reg->zebra_maps, reg_type,
1114                           &termp, term_dict + j, space_split, term_dst))
1115             {
1116                 *term_sub = 0;
1117                 return ZEBRA_OK;
1118             }
1119             strcat(term_dict, ")");
1120             break;
1121         case 103:       /* Regexp-2 */
1122             regex_range = 1;
1123             term_dict[j++] = '(';
1124             if (!term_103(zh->reg->zebra_maps, reg_type,
1125                           &termp, term_dict + j, &regex_range,
1126                           space_split, term_dst))
1127             {
1128                 *term_sub = 0;
1129                 return ZEBRA_OK;
1130             }
1131             strcat(term_dict, ")");
1132             break;
1133         case 104:        /* process # and ! in term */
1134             term_dict[j++] = '(';
1135             if (!term_104(zh->reg->zebra_maps, reg_type,
1136                           &termp, term_dict + j, space_split, term_dst))
1137             {
1138                 *term_sub = 0;
1139                 return ZEBRA_OK;
1140             }
1141             strcat(term_dict, ")");
1142             break;
1143         case 105:        /* process * and ! in term */
1144             term_dict[j++] = '(';
1145             if (!term_105(zh->reg->zebra_maps, reg_type,
1146                           &termp, term_dict + j, space_split, term_dst, 1))
1147             {
1148                 *term_sub = 0;
1149                 return ZEBRA_OK;
1150             }
1151             strcat(term_dict, ")");
1152             break;
1153         case 106:        /* process * and ! in term */
1154             term_dict[j++] = '(';
1155             if (!term_105(zh->reg->zebra_maps, reg_type,
1156                           &termp, term_dict + j, space_split, term_dst, 0))
1157             {
1158                 *term_sub = 0;
1159                 return ZEBRA_OK;
1160             }
1161             strcat(term_dict, ")");
1162             break;
1163         default:
1164             zebra_setError_zint(zh,
1165                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1166                                 truncation_value);
1167             return ZEBRA_FAIL;
1168         }
1169         if (1)
1170         {
1171             char buf[80];
1172             const char *input = term_dict + prefix_len;
1173             esc_str(buf, sizeof(buf), input, strlen(input));
1174         }
1175         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1176         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1177                              grep_info, &max_pos, 
1178                              ord_len /* number of "exact" chars */,
1179                              grep_handle);
1180         if (r)
1181             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1182     }
1183     if (!bases_ok)
1184         return ZEBRA_FAIL;
1185     *term_sub = termp;
1186     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1187     return ZEBRA_OK;
1188 }
1189
1190
1191
1192 static void grep_info_delete(struct grep_info *grep_info)
1193 {
1194 #ifdef TERM_COUNT
1195     xfree(grep_info->term_no);
1196 #endif
1197     xfree(grep_info->isam_p_buf);
1198 }
1199
1200 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1201                                    Z_AttributesPlusTerm *zapt,
1202                                    struct grep_info *grep_info,
1203                                    int reg_type)
1204 {
1205     AttrType termset;
1206     int termset_value_numeric;
1207     const char *termset_value_string;
1208
1209 #ifdef TERM_COUNT
1210     grep_info->term_no = 0;
1211 #endif
1212     grep_info->isam_p_size = 0;
1213     grep_info->isam_p_buf = NULL;
1214     grep_info->zh = zh;
1215     grep_info->reg_type = reg_type;
1216     grep_info->termset = 0;
1217     if (!zapt)
1218         return ZEBRA_OK;
1219     attr_init_APT(&termset, zapt, 8);
1220     termset_value_numeric =
1221         attr_find_ex(&termset, NULL, &termset_value_string);
1222     if (termset_value_numeric != -1)
1223     {
1224 #if TERMSET_DISABLE
1225         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1226         return ZEBRA_FAIL;
1227 #else
1228         char resname[32];
1229         const char *termset_name = 0;
1230         if (termset_value_numeric != -2)
1231         {
1232     
1233             sprintf(resname, "%d", termset_value_numeric);
1234             termset_name = resname;
1235         }
1236         else
1237             termset_name = termset_value_string;
1238         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1239         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1240         if (!grep_info->termset)
1241         {
1242             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1243             return ZEBRA_FAIL;
1244         }
1245 #endif
1246     }
1247     return ZEBRA_OK;
1248 }
1249                                
1250 /**
1251   \brief Create result set(s) for list of terms
1252   \param zh Zebra Handle
1253   \param zapt Attributes Plust Term (RPN leaf)
1254   \param termz term as used in query but converted to UTF-8
1255   \param attributeSet default attribute set
1256   \param stream memory for result
1257   \param reg_type register type ('w', 'p',..)
1258   \param complete_flag whether it's phrases or not
1259   \param rank_type term flags for ranking
1260   \param xpath_use use attribute for X-Path (-1 for no X-path)
1261   \param num_bases number of databases
1262   \param basenames array of databases
1263   \param rset_nmem memory for result sets
1264   \param result_sets output result set for each term in list (output)
1265   \param num_result_sets number of output result sets
1266   \param kc rset key control to be used for created result sets
1267 */
1268 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1269                                  Z_AttributesPlusTerm *zapt,
1270                                  const char *termz,
1271                                  oid_value attributeSet,
1272                                  NMEM stream,
1273                                  int reg_type, int complete_flag,
1274                                  const char *rank_type,
1275                                  const char *xpath_use,
1276                                  int num_bases, char **basenames, 
1277                                  NMEM rset_nmem,
1278                                  RSET **result_sets, int *num_result_sets,
1279                                  struct rset_key_control *kc)
1280 {
1281     char term_dst[IT_MAX_WORD+1];
1282     struct grep_info grep_info;
1283     const char *termp = termz;
1284     int alloc_sets = 0;
1285
1286     *num_result_sets = 0;
1287     *term_dst = 0;
1288     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1289         return ZEBRA_FAIL;
1290     while(1)
1291     { 
1292         ZEBRA_RES res;
1293
1294         if (alloc_sets == *num_result_sets)
1295         {
1296             int add = 10;
1297             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1298                                               sizeof(*rnew));
1299             if (alloc_sets)
1300                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1301             alloc_sets = alloc_sets + add;
1302             *result_sets = rnew;
1303         }
1304         res = term_trunc(zh, zapt, &termp, attributeSet,
1305                          stream, &grep_info,
1306                          reg_type, complete_flag,
1307                          num_bases, basenames,
1308                          term_dst, rank_type,
1309                          xpath_use, rset_nmem,
1310                          &(*result_sets)[*num_result_sets],
1311                          kc);
1312         if (res != ZEBRA_OK)
1313         {
1314             int i;
1315             for (i = 0; i < *num_result_sets; i++)
1316                 rset_delete((*result_sets)[i]);
1317             grep_info_delete (&grep_info);
1318             return res;
1319         }
1320         if ((*result_sets)[*num_result_sets] == 0)
1321             break;
1322         (*num_result_sets)++;
1323
1324         if (!*termp)
1325             break;
1326     }
1327     grep_info_delete(&grep_info);
1328     return ZEBRA_OK;
1329 }
1330
1331 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1332                                          Z_AttributesPlusTerm *zapt,
1333                                          oid_value attributeSet,
1334                                          int reg_type,
1335                                          int num_bases, char **basenames,
1336                                          NMEM rset_nmem,
1337                                          RSET *rset,
1338                                          struct rset_key_control *kc)
1339 {
1340     RSET *f_set;
1341     int base_no;
1342     int position_value;
1343     int num_sets = 0;
1344     AttrType position;
1345
1346     attr_init_APT(&position, zapt, 3);
1347     position_value = attr_find(&position, NULL);
1348     switch(position_value)
1349     {
1350     case 3:
1351     case -1:
1352         return ZEBRA_OK;
1353     case 1:
1354     case 2:
1355         break;
1356     default:
1357         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1358                             position_value);
1359         return ZEBRA_FAIL;
1360     }
1361
1362     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1363     {
1364         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1365                             position_value);
1366         return ZEBRA_FAIL;
1367     }
1368
1369     if (!zh->reg->isamb && !zh->reg->isamc)
1370     {
1371         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1372                             position_value);
1373         return ZEBRA_FAIL;
1374     }
1375     f_set = xmalloc(sizeof(RSET) * num_bases);
1376     for (base_no = 0; base_no < num_bases; base_no++)
1377     {
1378         int ord = -1;
1379         char ord_buf[32];
1380         char term_dict[100];
1381         int ord_len;
1382         char *val;
1383         ISAM_P isam_p;
1384
1385         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1386         {
1387             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1388                            basenames[base_no]);
1389             return ZEBRA_FAIL;
1390         }
1391         
1392         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1393                               attributeSet, &ord) != ZEBRA_OK)
1394             continue;
1395
1396         ord_len = key_SU_encode (ord, ord_buf);
1397         memcpy(term_dict, ord_buf, ord_len);
1398         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1399         val = dict_lookup(zh->reg->dict, term_dict);
1400         if (!val)
1401             continue;
1402         assert(*val == sizeof(ISAM_P));
1403         memcpy(&isam_p, val+1, sizeof(isam_p));
1404         
1405
1406         if (zh->reg->isamb)
1407             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1408                                                zh->reg->isamb, isam_p, 0);
1409         else if (zh->reg->isamc)
1410             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1411                                                zh->reg->isamc, isam_p, 0);
1412     }
1413     if (num_sets)
1414     {
1415         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1416                                0 /* termid */, num_sets, f_set);
1417     }
1418     xfree(f_set);
1419     return ZEBRA_OK;
1420 }
1421                                          
1422 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1423                                        Z_AttributesPlusTerm *zapt,
1424                                        const char *termz_org,
1425                                        oid_value attributeSet,
1426                                        NMEM stream,
1427                                        int reg_type, int complete_flag,
1428                                        const char *rank_type,
1429                                        const char *xpath_use,
1430                                        int num_bases, char **basenames, 
1431                                        NMEM rset_nmem,
1432                                        RSET *rset,
1433                                        struct rset_key_control *kc)
1434 {
1435     RSET *result_sets = 0;
1436     int num_result_sets = 0;
1437     ZEBRA_RES res =
1438         term_list_trunc(zh, zapt, termz_org, attributeSet,
1439                         stream, reg_type, complete_flag,
1440                         rank_type, xpath_use,
1441                         num_bases, basenames,
1442                         rset_nmem,
1443                         &result_sets, &num_result_sets, kc);
1444
1445     if (res != ZEBRA_OK)
1446         return res;
1447
1448     if (num_result_sets > 0)
1449     {
1450         RSET first_set = 0;
1451         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1452                                       reg_type,
1453                                       num_bases, basenames,
1454                                       rset_nmem, &first_set,
1455                                       kc);
1456         if (res != ZEBRA_OK)
1457             return res;
1458         if (first_set)
1459         {
1460             RSET *nsets = nmem_malloc(stream,
1461                                       sizeof(RSET) * (num_result_sets+1));
1462             nsets[0] = first_set;
1463             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1464             result_sets = nsets;
1465             num_result_sets++;
1466         }
1467     }
1468     if (num_result_sets == 0)
1469         *rset = rset_create_null(rset_nmem, kc, 0); 
1470     else if (num_result_sets == 1)
1471         *rset = result_sets[0];
1472     else
1473         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1474                                  num_result_sets, result_sets,
1475                                  1 /* ordered */, 0 /* exclusion */,
1476                                  3 /* relation */, 1 /* distance */);
1477     if (!*rset)
1478         return ZEBRA_FAIL;
1479     return ZEBRA_OK;
1480 }
1481
1482 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1483                                         Z_AttributesPlusTerm *zapt,
1484                                         const char *termz_org,
1485                                         oid_value attributeSet,
1486                                         NMEM stream,
1487                                         int reg_type, int complete_flag,
1488                                         const char *rank_type,
1489                                         const char *xpath_use,
1490                                         int num_bases, char **basenames,
1491                                         NMEM rset_nmem,
1492                                         RSET *rset,
1493                                         struct rset_key_control *kc)
1494 {
1495     RSET *result_sets = 0;
1496     int num_result_sets = 0;
1497     int i;
1498     ZEBRA_RES res =
1499         term_list_trunc(zh, zapt, termz_org, attributeSet,
1500                         stream, reg_type, complete_flag,
1501                         rank_type, xpath_use,
1502                         num_bases, basenames,
1503                         rset_nmem,
1504                         &result_sets, &num_result_sets, kc);
1505     if (res != ZEBRA_OK)
1506         return res;
1507
1508     for (i = 0; i<num_result_sets; i++)
1509     {
1510         RSET first_set = 0;
1511         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1512                                       reg_type,
1513                                       num_bases, basenames,
1514                                       rset_nmem, &first_set,
1515                                       kc);
1516         if (res != ZEBRA_OK)
1517         {
1518             for (i = 0; i<num_result_sets; i++)
1519                 rset_delete(result_sets[i]);
1520             return res;
1521         }
1522
1523         if (first_set)
1524         {
1525             RSET tmp_set[2];
1526
1527             tmp_set[0] = first_set;
1528             tmp_set[1] = result_sets[i];
1529             
1530             result_sets[i] = rset_create_prox(
1531                 rset_nmem, kc, kc->scope,
1532                 2, tmp_set,
1533                 1 /* ordered */, 0 /* exclusion */,
1534                 3 /* relation */, 1 /* distance */);
1535         }
1536     }
1537     if (num_result_sets == 0)
1538         *rset = rset_create_null(rset_nmem, kc, 0); 
1539     else if (num_result_sets == 1)
1540         *rset = result_sets[0];
1541     else
1542         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1543                                num_result_sets, result_sets);
1544     if (!*rset)
1545         return ZEBRA_FAIL;
1546     return ZEBRA_OK;
1547 }
1548
1549 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1550                                          Z_AttributesPlusTerm *zapt,
1551                                          const char *termz_org,
1552                                          oid_value attributeSet,
1553                                          NMEM stream,
1554                                          int reg_type, int complete_flag,
1555                                          const char *rank_type, 
1556                                          const char *xpath_use,
1557                                          int num_bases, char **basenames,
1558                                          NMEM rset_nmem,
1559                                          RSET *rset,
1560                                          struct rset_key_control *kc)
1561 {
1562     RSET *result_sets = 0;
1563     int num_result_sets = 0;
1564     int i;
1565     ZEBRA_RES res =
1566         term_list_trunc(zh, zapt, termz_org, attributeSet,
1567                         stream, reg_type, complete_flag,
1568                         rank_type, xpath_use,
1569                         num_bases, basenames,
1570                         rset_nmem,
1571                         &result_sets, &num_result_sets,
1572                         kc);
1573     if (res != ZEBRA_OK)
1574         return res;
1575     for (i = 0; i<num_result_sets; i++)
1576     {
1577         RSET first_set = 0;
1578         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1579                                       reg_type,
1580                                       num_bases, basenames,
1581                                       rset_nmem, &first_set,
1582                                       kc);
1583         if (res != ZEBRA_OK)
1584         {
1585             for (i = 0; i<num_result_sets; i++)
1586                 rset_delete(result_sets[i]);
1587             return res;
1588         }
1589
1590         if (first_set)
1591         {
1592             RSET tmp_set[2];
1593
1594             tmp_set[0] = first_set;
1595             tmp_set[1] = result_sets[i];
1596             
1597             result_sets[i] = rset_create_prox(
1598                 rset_nmem, kc, kc->scope,
1599                 2, tmp_set,
1600                 1 /* ordered */, 0 /* exclusion */,
1601                 3 /* relation */, 1 /* distance */);
1602         }
1603     }
1604
1605
1606     if (num_result_sets == 0)
1607         *rset = rset_create_null(rset_nmem, kc, 0); 
1608     else if (num_result_sets == 1)
1609         *rset = result_sets[0];
1610     else
1611         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1612                                 num_result_sets, result_sets);
1613     if (!*rset)
1614         return ZEBRA_FAIL;
1615     return ZEBRA_OK;
1616 }
1617
1618 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1619                             const char **term_sub,
1620                             char *term_dict,
1621                             oid_value attributeSet,
1622                             struct grep_info *grep_info,
1623                             int *max_pos,
1624                             int reg_type,
1625                             char *term_dst,
1626                             int *error_code)
1627 {
1628     AttrType relation;
1629     int relation_value;
1630     int term_value;
1631     int r;
1632     char *term_tmp = term_dict + strlen(term_dict);
1633
1634     *error_code = 0;
1635     attr_init_APT(&relation, zapt, 2);
1636     relation_value = attr_find(&relation, NULL);
1637
1638     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1639
1640     switch (relation_value)
1641     {
1642     case 1:
1643         yaz_log(log_level_rpn, "Relation <");
1644         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1645                       term_dst))
1646             return 0;
1647         term_value = atoi (term_tmp);
1648         gen_regular_rel(term_tmp, term_value-1, 1);
1649         break;
1650     case 2:
1651         yaz_log(log_level_rpn, "Relation <=");
1652         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1653                       term_dst))
1654             return 0;
1655         term_value = atoi (term_tmp);
1656         gen_regular_rel(term_tmp, term_value, 1);
1657         break;
1658     case 4:
1659         yaz_log(log_level_rpn, "Relation >=");
1660         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1661                       term_dst))
1662             return 0;
1663         term_value = atoi (term_tmp);
1664         gen_regular_rel(term_tmp, term_value, 0);
1665         break;
1666     case 5:
1667         yaz_log(log_level_rpn, "Relation >");
1668         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1669                       term_dst))
1670             return 0;
1671         term_value = atoi (term_tmp);
1672         gen_regular_rel(term_tmp, term_value+1, 0);
1673         break;
1674     case -1:
1675     case 3:
1676         yaz_log(log_level_rpn, "Relation =");
1677         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1678                       term_dst))
1679             return 0;
1680         term_value = atoi (term_tmp);
1681         sprintf(term_tmp, "(0*%d)", term_value);
1682         break;
1683     case 103:
1684         /* term_tmp untouched.. */
1685         while (**term_sub != '\0')
1686             (*term_sub)++;
1687         break;
1688     default:
1689         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1690         return 0;
1691     }
1692     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1693     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1694                           0, grep_handle);
1695     if (r)
1696         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1697     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1698     return 1;
1699 }
1700
1701 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1702                               const char **term_sub, 
1703                               oid_value attributeSet, NMEM stream,
1704                               struct grep_info *grep_info,
1705                               int reg_type, int complete_flag,
1706                               int num_bases, char **basenames,
1707                               char *term_dst, 
1708                               const char *xpath_use,
1709                               struct ord_list **ol)
1710 {
1711     char term_dict[2*IT_MAX_WORD+2];
1712     int base_no;
1713     const char *termp;
1714     struct rpn_char_map_info rcmi;
1715
1716     int bases_ok = 0;     /* no of databases with OK attribute */
1717
1718     *ol = ord_list_create(stream);
1719
1720     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1721
1722     for (base_no = 0; base_no < num_bases; base_no++)
1723     {
1724         int max_pos, prefix_len = 0;
1725         int relation_error = 0;
1726         int ord, ord_len, i;
1727         char ord_buf[32];
1728
1729         termp = *term_sub;
1730
1731         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1732         {
1733             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1734                            basenames[base_no]);
1735             return ZEBRA_FAIL;
1736         }
1737
1738         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1739                               attributeSet, &ord) != ZEBRA_OK)
1740             continue;
1741         bases_ok++;
1742
1743         *ol = ord_list_append(stream, *ol, ord);
1744
1745         ord_len = key_SU_encode (ord, ord_buf);
1746
1747         term_dict[prefix_len++] = '(';
1748         for (i = 0; i < ord_len; i++)
1749         {
1750             term_dict[prefix_len++] = 1;
1751             term_dict[prefix_len++] = ord_buf[i];
1752         }
1753         term_dict[prefix_len++] = ')';
1754         term_dict[prefix_len] = '\0';
1755
1756         if (!numeric_relation(zh, zapt, &termp, term_dict,
1757                               attributeSet, grep_info, &max_pos, reg_type,
1758                               term_dst, &relation_error))
1759         {
1760             if (relation_error)
1761             {
1762                 zebra_setError(zh, relation_error, 0);
1763                 return ZEBRA_FAIL;
1764             }
1765             *term_sub = 0;
1766             return ZEBRA_OK;
1767         }
1768     }
1769     if (!bases_ok)
1770         return ZEBRA_FAIL;
1771     *term_sub = termp;
1772     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1773     return ZEBRA_OK;
1774 }
1775
1776                                  
1777 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1778                                         Z_AttributesPlusTerm *zapt,
1779                                         const char *termz,
1780                                         oid_value attributeSet,
1781                                         NMEM stream,
1782                                         int reg_type, int complete_flag,
1783                                         const char *rank_type, 
1784                                         const char *xpath_use,
1785                                         int num_bases, char **basenames,
1786                                         NMEM rset_nmem,
1787                                         RSET *rset,
1788                                         struct rset_key_control *kc)
1789 {
1790     char term_dst[IT_MAX_WORD+1];
1791     const char *termp = termz;
1792     RSET *result_sets = 0;
1793     int num_result_sets = 0;
1794     ZEBRA_RES res;
1795     struct grep_info grep_info;
1796     int alloc_sets = 0;
1797     zint hits_limit_value;
1798     const char *term_ref_id_str = 0;
1799
1800     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1801
1802     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1803     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1804         return ZEBRA_FAIL;
1805     while (1)
1806     { 
1807         struct ord_list *ol;
1808         if (alloc_sets == num_result_sets)
1809         {
1810             int add = 10;
1811             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1812                                               sizeof(*rnew));
1813             if (alloc_sets)
1814                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1815             alloc_sets = alloc_sets + add;
1816             result_sets = rnew;
1817         }
1818         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1819         grep_info.isam_p_indx = 0;
1820         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1821                            reg_type, complete_flag, num_bases, basenames,
1822                            term_dst, xpath_use, &ol);
1823         if (res == ZEBRA_FAIL || termp == 0)
1824             break;
1825         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1826         result_sets[num_result_sets] =
1827             rset_trunc(zh, grep_info.isam_p_buf,
1828                        grep_info.isam_p_indx, term_dst,
1829                        strlen(term_dst), rank_type,
1830                        0 /* preserve position */,
1831                        zapt->term->which, rset_nmem, 
1832                        kc, kc->scope, ol, reg_type,
1833                        hits_limit_value,
1834                        term_ref_id_str);
1835         if (!result_sets[num_result_sets])
1836             break;
1837         num_result_sets++;
1838         if (!*termp)
1839             break;
1840     }
1841     grep_info_delete(&grep_info);
1842
1843     if (res != ZEBRA_OK)
1844         return res;
1845     if (num_result_sets == 0)
1846         *rset = rset_create_null(rset_nmem, kc, 0);
1847     else if (num_result_sets == 1)
1848         *rset = result_sets[0];
1849     else
1850         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1851                                 num_result_sets, result_sets);
1852     if (!*rset)
1853         return ZEBRA_FAIL;
1854     return ZEBRA_OK;
1855 }
1856
1857 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1858                                       Z_AttributesPlusTerm *zapt,
1859                                       const char *termz,
1860                                       oid_value attributeSet,
1861                                       NMEM stream,
1862                                       const char *rank_type, NMEM rset_nmem,
1863                                       RSET *rset,
1864                                       struct rset_key_control *kc)
1865 {
1866     RSFD rsfd;
1867     struct it_key key;
1868     int sys;
1869     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1870                              res_get (zh->res, "setTmpDir"),0 );
1871     rsfd = rset_open(*rset, RSETF_WRITE);
1872     
1873     sys = atoi(termz);
1874     if (sys <= 0)
1875         sys = 1;
1876     key.mem[0] = sys;
1877     key.mem[1] = 1;
1878     key.len = 2;
1879     rset_write (rsfd, &key);
1880     rset_close (rsfd);
1881     return ZEBRA_OK;
1882 }
1883
1884 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1885                                oid_value attributeSet, NMEM stream,
1886                                Z_SortKeySpecList *sort_sequence,
1887                                const char *rank_type,
1888                                NMEM rset_nmem,
1889                                RSET *rset,
1890                                struct rset_key_control *kc)
1891 {
1892     int i;
1893     int sort_relation_value;
1894     AttrType sort_relation_type;
1895     Z_SortKeySpec *sks;
1896     Z_SortKey *sk;
1897     int oid[OID_SIZE];
1898     oident oe;
1899     char termz[20];
1900     
1901     attr_init_APT(&sort_relation_type, zapt, 7);
1902     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1903
1904     if (!sort_sequence->specs)
1905     {
1906         sort_sequence->num_specs = 10;
1907         sort_sequence->specs = (Z_SortKeySpec **)
1908             nmem_malloc(stream, sort_sequence->num_specs *
1909                          sizeof(*sort_sequence->specs));
1910         for (i = 0; i<sort_sequence->num_specs; i++)
1911             sort_sequence->specs[i] = 0;
1912     }
1913     if (zapt->term->which != Z_Term_general)
1914         i = 0;
1915     else
1916         i = atoi_n ((char *) zapt->term->u.general->buf,
1917                     zapt->term->u.general->len);
1918     if (i >= sort_sequence->num_specs)
1919         i = 0;
1920     sprintf(termz, "%d", i);
1921
1922     oe.proto = PROTO_Z3950;
1923     oe.oclass = CLASS_ATTSET;
1924     oe.value = attributeSet;
1925     if (!oid_ent_to_oid (&oe, oid))
1926         return ZEBRA_FAIL;
1927
1928     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1929     sks->sortElement = (Z_SortElement *)
1930         nmem_malloc(stream, sizeof(*sks->sortElement));
1931     sks->sortElement->which = Z_SortElement_generic;
1932     sk = sks->sortElement->u.generic = (Z_SortKey *)
1933         nmem_malloc(stream, sizeof(*sk));
1934     sk->which = Z_SortKey_sortAttributes;
1935     sk->u.sortAttributes = (Z_SortAttributes *)
1936         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1937
1938     sk->u.sortAttributes->id = oid;
1939     sk->u.sortAttributes->list = zapt->attributes;
1940
1941     sks->sortRelation = (int *)
1942         nmem_malloc(stream, sizeof(*sks->sortRelation));
1943     if (sort_relation_value == 1)
1944         *sks->sortRelation = Z_SortKeySpec_ascending;
1945     else if (sort_relation_value == 2)
1946         *sks->sortRelation = Z_SortKeySpec_descending;
1947     else 
1948         *sks->sortRelation = Z_SortKeySpec_ascending;
1949
1950     sks->caseSensitivity = (int *)
1951         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1952     *sks->caseSensitivity = 0;
1953
1954     sks->which = Z_SortKeySpec_null;
1955     sks->u.null = odr_nullval ();
1956     sort_sequence->specs[i] = sks;
1957     *rset = rset_create_null(rset_nmem, kc, 0);
1958     return ZEBRA_OK;
1959 }
1960
1961
1962 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1963                            oid_value attributeSet,
1964                            struct xpath_location_step *xpath, int max,
1965                            NMEM mem)
1966 {
1967     oid_value curAttributeSet = attributeSet;
1968     AttrType use;
1969     const char *use_string = 0;
1970     
1971     attr_init_APT(&use, zapt, 1);
1972     attr_find_ex(&use, &curAttributeSet, &use_string);
1973
1974     if (!use_string || *use_string != '/')
1975         return -1;
1976
1977     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1978 }
1979  
1980                
1981
1982 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1983                         int reg_type, const char *term, 
1984                         const char *xpath_use,
1985                         NMEM rset_nmem,
1986                         struct rset_key_control *kc)
1987 {
1988     RSET rset;
1989     struct grep_info grep_info;
1990     char term_dict[2048];
1991     char ord_buf[32];
1992     int prefix_len = 0;
1993     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1994                                            zinfo_index_category_index,
1995                                            reg_type,
1996                                            xpath_use);
1997     int ord_len, i, r, max_pos;
1998     int term_type = Z_Term_characterString;
1999     const char *flags = "void";
2000
2001     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2002         return rset_create_null(rset_nmem, kc, 0);
2003     
2004     if (ord < 0)
2005         return rset_create_null(rset_nmem, kc, 0);
2006     if (prefix_len)
2007         term_dict[prefix_len++] = '|';
2008     else
2009         term_dict[prefix_len++] = '(';
2010     
2011     ord_len = key_SU_encode (ord, ord_buf);
2012     for (i = 0; i<ord_len; i++)
2013     {
2014         term_dict[prefix_len++] = 1;
2015         term_dict[prefix_len++] = ord_buf[i];
2016     }
2017     term_dict[prefix_len++] = ')';
2018     strcpy(term_dict+prefix_len, term);
2019     
2020     grep_info.isam_p_indx = 0;
2021     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2022                           &grep_info, &max_pos, 0, grep_handle);
2023     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2024              grep_info.isam_p_indx);
2025     rset = rset_trunc(zh, grep_info.isam_p_buf,
2026                       grep_info.isam_p_indx, term, strlen(term),
2027                       flags, 1, term_type,rset_nmem,
2028                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2029                       0 /* term_ref_id_str */);
2030     grep_info_delete(&grep_info);
2031     return rset;
2032 }
2033
2034 static
2035 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2036                            int num_bases, char **basenames,
2037                            NMEM stream, const char *rank_type, RSET rset,
2038                            int xpath_len, struct xpath_location_step *xpath,
2039                            NMEM rset_nmem,
2040                            RSET *rset_out,
2041                            struct rset_key_control *kc)
2042 {
2043     int base_no;
2044     int i;
2045     int always_matches = rset ? 0 : 1;
2046
2047     if (xpath_len < 0)
2048     {
2049         *rset_out = rset;
2050         return ZEBRA_OK;
2051     }
2052
2053     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2054     for (i = 0; i<xpath_len; i++)
2055     {
2056         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2057
2058     }
2059
2060     /*
2061       //a    ->    a/.*
2062       //a/b  ->    b/a/.*
2063       /a     ->    a/
2064       /a/b   ->    b/a/
2065
2066       /      ->    none
2067
2068    a[@attr = value]/b[@other = othervalue]
2069
2070  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2071  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2072  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2073  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2074  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2075  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2076       
2077     */
2078
2079     dict_grep_cmap (zh->reg->dict, 0, 0);
2080
2081     for (base_no = 0; base_no < num_bases; base_no++)
2082     {
2083         int level = xpath_len;
2084         int first_path = 1;
2085         
2086         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2087         {
2088             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2089                            basenames[base_no]);
2090             *rset_out = rset;
2091             return ZEBRA_FAIL;
2092         }
2093         while (--level >= 0)
2094         {
2095             WRBUF xpath_rev = wrbuf_alloc();
2096             int i;
2097             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2098
2099             for (i = level; i >= 1; --i)
2100             {
2101                 const char *cp = xpath[i].part;
2102                 if (*cp)
2103                 {
2104                     for (; *cp; cp++)
2105                     {
2106                         if (*cp == '*')
2107                             wrbuf_puts(xpath_rev, "[^/]*");
2108                         else if (*cp == ' ')
2109                             wrbuf_puts(xpath_rev, "\001 ");
2110                         else
2111                             wrbuf_putc(xpath_rev, *cp);
2112
2113                         /* wrbuf_putc does not null-terminate , but
2114                            wrbuf_puts below ensures it does.. so xpath_rev
2115                            is OK iff length is > 0 */
2116                     }
2117                     wrbuf_puts(xpath_rev, "/");
2118                 }
2119                 else if (i == 1)  /* // case */
2120                     wrbuf_puts(xpath_rev, ".*");
2121             }
2122             if (xpath[level].predicate &&
2123                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2124                 xpath[level].predicate->u.relation.name[0])
2125             {
2126                 WRBUF wbuf = wrbuf_alloc();
2127                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2128                 if (xpath[level].predicate->u.relation.value)
2129                 {
2130                     const char *cp = xpath[level].predicate->u.relation.value;
2131                     wrbuf_putc(wbuf, '=');
2132                     
2133                     while (*cp)
2134                     {
2135                         if (strchr(REGEX_CHARS, *cp))
2136                             wrbuf_putc(wbuf, '\\');
2137                         wrbuf_putc(wbuf, *cp);
2138                         cp++;
2139                     }
2140                 }
2141                 wrbuf_puts(wbuf, "");
2142                 rset_attr = xpath_trunc(
2143                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2144                     rset_nmem, kc);
2145                 wrbuf_free(wbuf, 1);
2146             } 
2147             else 
2148             {
2149                 if (!first_path)
2150                 {
2151                     wrbuf_free(xpath_rev, 1);
2152                     continue;
2153                 }
2154             }
2155             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2156                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2157             if (wrbuf_len(xpath_rev))
2158             {
2159                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2160                                              wrbuf_buf(xpath_rev),
2161                                              ZEBRA_XPATH_ELM_BEGIN, 
2162                                              rset_nmem, kc);
2163                 if (always_matches)
2164                     rset = rset_start_tag;
2165                 else
2166                 {
2167                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2168                                                wrbuf_buf(xpath_rev),
2169                                                ZEBRA_XPATH_ELM_END, 
2170                                                rset_nmem, kc);
2171                     
2172                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2173                                                rset_start_tag, rset,
2174                                                rset_end_tag, rset_attr);
2175                 }
2176             }
2177             wrbuf_free(xpath_rev, 1);
2178             first_path = 0;
2179         }
2180     }
2181     *rset_out = rset;
2182     return ZEBRA_OK;
2183 }
2184
2185 #define MAX_XPATH_STEPS 10
2186
2187 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2188                                 oid_value attributeSet, NMEM stream,
2189                                 Z_SortKeySpecList *sort_sequence,
2190                                 int num_bases, char **basenames, 
2191                                 NMEM rset_nmem,
2192                                 RSET *rset,
2193                                 struct rset_key_control *kc)
2194 {
2195     ZEBRA_RES res = ZEBRA_OK;
2196     unsigned reg_id;
2197     char *search_type = NULL;
2198     char rank_type[128];
2199     int complete_flag;
2200     int sort_flag;
2201     char termz[IT_MAX_WORD+1];
2202     int xpath_len;
2203     const char *xpath_use = 0;
2204     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2205
2206     if (!log_level_set)
2207     {
2208         log_level_rpn = yaz_log_module_level("rpn");
2209         log_level_set = 1;
2210     }
2211     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2212                     rank_type, &complete_flag, &sort_flag);
2213     
2214     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2215     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2216     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2217     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2218
2219     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2220         return ZEBRA_FAIL;
2221
2222     if (sort_flag)
2223         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2224                              rank_type, rset_nmem, rset, kc);
2225     /* consider if an X-Path query is used */
2226     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2227                                 xpath, MAX_XPATH_STEPS, stream);
2228     if (xpath_len >= 0)
2229     {
2230         if (xpath[xpath_len-1].part[0] == '@') 
2231             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2232         else
2233             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2234
2235         if (1)
2236         {
2237             AttrType relation;
2238             int relation_value;
2239
2240             attr_init_APT(&relation, zapt, 2);
2241             relation_value = attr_find(&relation, NULL);
2242
2243             if (relation_value == 103) /* alwaysmatches */
2244             {
2245                 *rset = 0; /* signal no "term" set */
2246                 return rpn_search_xpath(zh, num_bases, basenames,
2247                                         stream, rank_type, *rset, 
2248                                         xpath_len, xpath, rset_nmem, rset, kc);
2249             }
2250         }
2251     }
2252
2253     /* search using one of the various search type strategies
2254        termz is our UTF-8 search term
2255        attributeSet is top-level default attribute set 
2256        stream is ODR for search
2257        reg_id is the register type
2258        complete_flag is 1 for complete subfield, 0 for incomplete
2259        xpath_use is use-attribute to be used for X-Path search, 0 for none
2260     */
2261     if (!strcmp(search_type, "phrase"))
2262     {
2263         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2264                                     reg_id, complete_flag, rank_type,
2265                                     xpath_use,
2266                                     num_bases, basenames, rset_nmem,
2267                                     rset, kc);
2268     }
2269     else if (!strcmp(search_type, "and-list"))
2270     {
2271         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2272                                       reg_id, complete_flag, rank_type,
2273                                       xpath_use,
2274                                       num_bases, basenames, rset_nmem,
2275                                       rset, kc);
2276     }
2277     else if (!strcmp(search_type, "or-list"))
2278     {
2279         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2280                                      reg_id, complete_flag, rank_type,
2281                                      xpath_use,
2282                                      num_bases, basenames, rset_nmem,
2283                                      rset, kc);
2284     }
2285     else if (!strcmp(search_type, "local"))
2286     {
2287         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2288                                    rank_type, rset_nmem, rset, kc);
2289     }
2290     else if (!strcmp(search_type, "numeric"))
2291     {
2292         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2293                                      reg_id, complete_flag, rank_type,
2294                                      xpath_use,
2295                                      num_bases, basenames, rset_nmem,
2296                                      rset, kc);
2297     }
2298     else
2299     {
2300         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2301         res = ZEBRA_FAIL;
2302     }
2303     if (res != ZEBRA_OK)
2304         return res;
2305     if (!*rset)
2306         return ZEBRA_FAIL;
2307     return rpn_search_xpath(zh, num_bases, basenames,
2308                             stream, rank_type, *rset, 
2309                             xpath_len, xpath, rset_nmem, rset, kc);
2310 }
2311
2312 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2313                                       oid_value attributeSet, 
2314                                       NMEM stream, NMEM rset_nmem,
2315                                       Z_SortKeySpecList *sort_sequence,
2316                                       int num_bases, char **basenames,
2317                                       RSET **result_sets, int *num_result_sets,
2318                                       Z_Operator *parent_op,
2319                                       struct rset_key_control *kc);
2320
2321 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2322                                    zint *approx_limit)
2323 {
2324     ZEBRA_RES res = ZEBRA_OK;
2325     if (zs->which == Z_RPNStructure_complex)
2326     {
2327         if (res == ZEBRA_OK)
2328             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2329                                            approx_limit);
2330         if (res == ZEBRA_OK)
2331             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2332                                            approx_limit);
2333     }
2334     else if (zs->which == Z_RPNStructure_simple)
2335     {
2336         if (zs->u.simple->which == Z_Operand_APT)
2337         {
2338             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2339             AttrType global_hits_limit_attr;
2340             int l;
2341             
2342             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2343             
2344             l = attr_find(&global_hits_limit_attr, NULL);
2345             if (l != -1)
2346                 *approx_limit = l;
2347         }
2348     }
2349     return res;
2350 }
2351
2352 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2353                          oid_value attributeSet, 
2354                          NMEM stream, NMEM rset_nmem,
2355                          Z_SortKeySpecList *sort_sequence,
2356                          int num_bases, char **basenames,
2357                          RSET *result_set)
2358 {
2359     RSET *result_sets = 0;
2360     int num_result_sets = 0;
2361     ZEBRA_RES res;
2362     struct rset_key_control *kc = zebra_key_control_create(zh);
2363
2364     res = rpn_search_structure(zh, zs, attributeSet,
2365                                stream, rset_nmem,
2366                                sort_sequence, 
2367                                num_bases, basenames,
2368                                &result_sets, &num_result_sets,
2369                                0 /* no parent op */,
2370                                kc);
2371     if (res != ZEBRA_OK)
2372     {
2373         int i;
2374         for (i = 0; i<num_result_sets; i++)
2375             rset_delete(result_sets[i]);
2376         *result_set = 0;
2377     }
2378     else
2379     {
2380         assert(num_result_sets == 1);
2381         assert(result_sets);
2382         assert(*result_sets);
2383         *result_set = *result_sets;
2384     }
2385     (*kc->dec)(kc);
2386     return res;
2387 }
2388
2389 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2390                                oid_value attributeSet, 
2391                                NMEM stream, NMEM rset_nmem,
2392                                Z_SortKeySpecList *sort_sequence,
2393                                int num_bases, char **basenames,
2394                                RSET **result_sets, int *num_result_sets,
2395                                Z_Operator *parent_op,
2396                                struct rset_key_control *kc)
2397 {
2398     *num_result_sets = 0;
2399     if (zs->which == Z_RPNStructure_complex)
2400     {
2401         ZEBRA_RES res;
2402         Z_Operator *zop = zs->u.complex->roperator;
2403         RSET *result_sets_l = 0;
2404         int num_result_sets_l = 0;
2405         RSET *result_sets_r = 0;
2406         int num_result_sets_r = 0;
2407
2408         res = rpn_search_structure(zh, zs->u.complex->s1,
2409                                    attributeSet, stream, rset_nmem,
2410                                    sort_sequence,
2411                                    num_bases, basenames,
2412                                    &result_sets_l, &num_result_sets_l,
2413                                    zop, kc);
2414         if (res != ZEBRA_OK)
2415         {
2416             int i;
2417             for (i = 0; i<num_result_sets_l; i++)
2418                 rset_delete(result_sets_l[i]);
2419             return res;
2420         }
2421         res = rpn_search_structure(zh, zs->u.complex->s2,
2422                                    attributeSet, stream, rset_nmem,
2423                                    sort_sequence,
2424                                    num_bases, basenames,
2425                                    &result_sets_r, &num_result_sets_r,
2426                                    zop, kc);
2427         if (res != ZEBRA_OK)
2428         {
2429             int i;
2430             for (i = 0; i<num_result_sets_l; i++)
2431                 rset_delete(result_sets_l[i]);
2432             for (i = 0; i<num_result_sets_r; i++)
2433                 rset_delete(result_sets_r[i]);
2434             return res;
2435         }
2436
2437         /* make a new list of result for all children */
2438         *num_result_sets = num_result_sets_l + num_result_sets_r;
2439         *result_sets = nmem_malloc(stream, *num_result_sets * 
2440                                    sizeof(**result_sets));
2441         memcpy(*result_sets, result_sets_l, 
2442                num_result_sets_l * sizeof(**result_sets));
2443         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2444                num_result_sets_r * sizeof(**result_sets));
2445
2446         if (!parent_op || parent_op->which != zop->which
2447             || (zop->which != Z_Operator_and &&
2448                 zop->which != Z_Operator_or))
2449         {
2450             /* parent node different from this one (or non-present) */
2451             /* we must combine result sets now */
2452             RSET rset;
2453             switch (zop->which)
2454             {
2455             case Z_Operator_and:
2456                 rset = rset_create_and(rset_nmem, kc,
2457                                        kc->scope,
2458                                        *num_result_sets, *result_sets);
2459                 break;
2460             case Z_Operator_or:
2461                 rset = rset_create_or(rset_nmem, kc,
2462                                       kc->scope, 0, /* termid */
2463                                       *num_result_sets, *result_sets);
2464                 break;
2465             case Z_Operator_and_not:
2466                 rset = rset_create_not(rset_nmem, kc,
2467                                        kc->scope,
2468                                        (*result_sets)[0],
2469                                        (*result_sets)[1]);
2470                 break;
2471             case Z_Operator_prox:
2472                 if (zop->u.prox->which != Z_ProximityOperator_known)
2473                 {
2474                     zebra_setError(zh, 
2475                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2476                                    0);
2477                     return ZEBRA_FAIL;
2478                 }
2479                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2480                 {
2481                     zebra_setError_zint(zh,
2482                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2483                                         *zop->u.prox->u.known);
2484                     return ZEBRA_FAIL;
2485                 }
2486                 else
2487                 {
2488                     rset = rset_create_prox(rset_nmem, kc,
2489                                             kc->scope,
2490                                             *num_result_sets, *result_sets, 
2491                                             *zop->u.prox->ordered,
2492                                             (!zop->u.prox->exclusion ? 
2493                                              0 : *zop->u.prox->exclusion),
2494                                             *zop->u.prox->relationType,
2495                                             *zop->u.prox->distance );
2496                 }
2497                 break;
2498             default:
2499                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2500                 return ZEBRA_FAIL;
2501             }
2502             *num_result_sets = 1;
2503             *result_sets = nmem_malloc(stream, *num_result_sets * 
2504                                        sizeof(**result_sets));
2505             (*result_sets)[0] = rset;
2506         }
2507     }
2508     else if (zs->which == Z_RPNStructure_simple)
2509     {
2510         RSET rset;
2511         ZEBRA_RES res;
2512
2513         if (zs->u.simple->which == Z_Operand_APT)
2514         {
2515             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2516             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2517                                  attributeSet, stream, sort_sequence,
2518                                  num_bases, basenames, rset_nmem, &rset,
2519                                  kc);
2520             if (res != ZEBRA_OK)
2521                 return res;
2522         }
2523         else if (zs->u.simple->which == Z_Operand_resultSetId)
2524         {
2525             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2526             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2527             if (!rset)
2528             {
2529                 zebra_setError(zh, 
2530                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2531                                zs->u.simple->u.resultSetId);
2532                 return ZEBRA_FAIL;
2533             }
2534             rset_dup(rset);
2535         }
2536         else
2537         {
2538             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2539             return ZEBRA_FAIL;
2540         }
2541         *num_result_sets = 1;
2542         *result_sets = nmem_malloc(stream, *num_result_sets * 
2543                                    sizeof(**result_sets));
2544         (*result_sets)[0] = rset;
2545     }
2546     else
2547     {
2548         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2549         return ZEBRA_FAIL;
2550     }
2551     return ZEBRA_OK;
2552 }
2553
2554
2555
2556 /*
2557  * Local variables:
2558  * c-basic-offset: 4
2559  * indent-tabs-mode: nil
2560  * End:
2561  * vim: shiftwidth=4 tabstop=8 expandtab
2562  */
2563